From 0c9a2d8847717535af1ef868e1f978e87fb1b3ab Mon Sep 17 00:00:00 2001 From: zachjweiner Date: Mon, 23 Sep 2019 22:16:01 -0500 Subject: [PATCH] initial release --- .gitignore | 20 + .readthedocs.yml | 21 + LICENSE | 21 + README.rst | 55 ++ doc/Makefile | 19 + doc/_static/copybutton.js | 65 ++ doc/changes.rst | 7 + doc/citing.rst | 12 + doc/conf.py | 176 ++++ doc/faq.rst | 4 + doc/index.rst | 46 + doc/installation.rst | 108 ++ doc/license.rst | 24 + doc/make.bat | 35 + doc/ref_codegen.rst | 23 + doc/ref_fourier.rst | 29 + doc/ref_multigrid.rst | 19 + doc/ref_numerics.rst | 14 + doc/ref_other.rst | 23 + doc/requirements.txt | 1 + environment.yml | 21 + examples/codegen-tutorial.ipynb | 1634 +++++++++++++++++++++++++++++++ examples/phi_chi.py | 182 ++++ examples/wave-equation.py | 45 + pystella/__init__.py | 136 +++ pystella/decomp.py | 585 +++++++++++ pystella/derivs.py | 360 +++++++ pystella/elementwise.py | 220 +++++ pystella/expansion.py | 170 ++++ pystella/field/__init__.py | 375 +++++++ pystella/field/diff.py | 89 ++ pystella/field/sympy.py | 143 +++ pystella/fourier/__init__.py | 37 + pystella/fourier/derivs.py | 161 +++ pystella/fourier/dft.py | 440 +++++++++ pystella/fourier/projectors.py | 343 +++++++ pystella/fourier/rayleigh.py | 395 ++++++++ pystella/fourier/spectra.py | 325 ++++++ pystella/multigrid/__init__.py | 493 ++++++++++ pystella/multigrid/relax.py | 376 +++++++ pystella/multigrid/transfer.py | 265 +++++ pystella/output.py | 177 ++++ pystella/reduction.py | 356 +++++++ pystella/sectors.py | 319 ++++++ pystella/stencil.py | 136 +++ pystella/step.py | 605 ++++++++++++ run_tests.sh | 8 + setup.cfg | 14 + setup.py | 66 ++ test/common.py | 63 ++ test/conftest.py | 22 + test/test_decomp.py | 196 ++++ test/test_derivs.py | 165 ++++ test/test_dft.py | 114 +++ test/test_elementwise.py | 104 ++ test/test_energy.py | 110 +++ test/test_examples.py | 61 ++ test/test_expansion.py | 82 ++ test/test_field.py | 213 ++++ test/test_multigrid.py | 122 +++ test/test_projectors.py | 392 ++++++++ test/test_rayleigh.py | 169 ++++ test/test_reduction.py | 200 ++++ test/test_relax.py | 139 +++ test/test_spectra.py | 198 ++++ test/test_stencil.py | 104 ++ test/test_step.py | 118 +++ test/test_transfer.py | 125 +++ 68 files changed, 11895 insertions(+) create mode 100644 .gitignore create mode 100644 .readthedocs.yml create mode 100644 LICENSE create mode 100644 README.rst create mode 100644 doc/Makefile create mode 100644 doc/_static/copybutton.js create mode 100644 doc/changes.rst create mode 100644 doc/citing.rst create mode 100644 doc/conf.py create mode 100644 doc/faq.rst create mode 100644 doc/index.rst create mode 100644 doc/installation.rst create mode 100644 doc/license.rst create mode 100644 doc/make.bat create mode 100644 doc/ref_codegen.rst create mode 100644 doc/ref_fourier.rst create mode 100644 doc/ref_multigrid.rst create mode 100644 doc/ref_numerics.rst create mode 100644 doc/ref_other.rst create mode 100644 doc/requirements.txt create mode 100644 environment.yml create mode 100644 examples/codegen-tutorial.ipynb create mode 100644 examples/phi_chi.py create mode 100644 examples/wave-equation.py create mode 100644 pystella/__init__.py create mode 100644 pystella/decomp.py create mode 100644 pystella/derivs.py create mode 100644 pystella/elementwise.py create mode 100644 pystella/expansion.py create mode 100644 pystella/field/__init__.py create mode 100644 pystella/field/diff.py create mode 100644 pystella/field/sympy.py create mode 100644 pystella/fourier/__init__.py create mode 100644 pystella/fourier/derivs.py create mode 100644 pystella/fourier/dft.py create mode 100644 pystella/fourier/projectors.py create mode 100644 pystella/fourier/rayleigh.py create mode 100644 pystella/fourier/spectra.py create mode 100644 pystella/multigrid/__init__.py create mode 100644 pystella/multigrid/relax.py create mode 100644 pystella/multigrid/transfer.py create mode 100644 pystella/output.py create mode 100644 pystella/reduction.py create mode 100644 pystella/sectors.py create mode 100644 pystella/stencil.py create mode 100644 pystella/step.py create mode 100644 run_tests.sh create mode 100644 setup.cfg create mode 100644 setup.py create mode 100644 test/common.py create mode 100644 test/conftest.py create mode 100644 test/test_decomp.py create mode 100644 test/test_derivs.py create mode 100644 test/test_dft.py create mode 100644 test/test_elementwise.py create mode 100644 test/test_energy.py create mode 100644 test/test_examples.py create mode 100644 test/test_expansion.py create mode 100644 test/test_field.py create mode 100644 test/test_multigrid.py create mode 100644 test/test_projectors.py create mode 100644 test/test_rayleigh.py create mode 100644 test/test_reduction.py create mode 100644 test/test_relax.py create mode 100644 test/test_spectra.py create mode 100644 test/test_stencil.py create mode 100644 test/test_step.py create mode 100644 test/test_transfer.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ba8896f --- /dev/null +++ b/.gitignore @@ -0,0 +1,20 @@ +# blacklist all +/* +# whitelist directories +!/* +# now whitelist stuff +!.gitignore +!**.py +!README.md +!*.cfg +# blacklist +build +dist +*.pyc +*.pyo +*.egg-info +*/_git_rev.py +doc/_build +.vscode +*.h5 +.pytest_cache \ No newline at end of file diff --git a/.readthedocs.yml b/.readthedocs.yml new file mode 100644 index 0000000..326c9f9 --- /dev/null +++ b/.readthedocs.yml @@ -0,0 +1,21 @@ +# .readthedocs.yml +# Read the Docs configuration file +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +# Required +version: 2 + +# Build documentation in the docs/ directory with Sphinx +sphinx: + configuration: doc/conf.py + +# Non-HTML is useless until the docs are much longer +formats: [] + +python: + version: 3.6 + install: + - method: setuptools + path: . + - requirements: doc/requirements.txt + system_packages: true diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..5ab38b5 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2019 Zachary J Weiner + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/README.rst b/README.rst new file mode 100644 index 0000000..590238d --- /dev/null +++ b/README.rst @@ -0,0 +1,55 @@ +pystella: a distributed and accelerated framework for PDE solving +================================================================= + +.. image:: https://readthedocs.org/projects/pystella/badge/?version=latest + :target: https://pystella.readthedocs.io/en/latest/?badge=latest + :alt: Documentation Status + +``pystella`` enables the easy expression of PDE systems and the algorithms to solve +them in high-performance computing environments within Python. +It provides interfaces to generate custom computational kernels +via `loopy `_ which are executed +on (multiple) CPUs or GPUs using +`pyopencl `_ +and `mpi4py `_. +Moreover, ``pystella`` implements a number of algorithms for PDE time evolution +and spatial discretization which can be readily applied to a variety of physical +systems. + +Its features include: + +* code generation for performant element-wise kernels, stencil-based computations, + and reductions +* distributed domain decomposition and grid boundary sychronization +* time-stepping algorithms, including low-storage Runge-Kutta schemes +* finite-difference and spectral-collocation methods for spatial derivatives +* wrappers to OpenCL-based Fast Fourier Transforms (FFTs) and distributed, + CPU FFTs +* methods for field analysis in Fourier space + +All of the above functionality is configured to run at high performance, as are +the interfaces for generating custom kernels by default (though this is +entirely user-configurable!). +Additionally, the provided functionality is intended to work seamlessly whether +running in distributed- (i.e., multiple devices) or shared-memory +(i.e., a single device) contexts, without sacrificing performance in either case. + +``pystella`` was designed for lattice field theory simulations of *preheating* after +cosmological inflation and provides functionality for the simple specification +of physical models of this process (as well as computing the resulting gravitational +wave emission). +However, ``pystella`` is much more generic; these components can be viewed +as examples for the symbolic representation of arbitrary physical systems as an +interface to its code generation routines. +Most systems discretized onto cartesian grids should be entirely within scope +(e.g., lattice field theory, (magneto)hydrodynamics, Einstein's equations, +electromagnetism, etc.). +``pystella`` provides entrypoints at varying levels of abstraction---so if you like +the idea of ``pystella`` but the algorithms you require are not implemented, +you can create new interfaces (or extend existing ones) for your purposes +with relative ease. +(Better yet, consider contributing a PR!) + +``pystella`` is `fully documented `_ +and is licensed under the liberal `MIT license +`_. diff --git a/doc/Makefile b/doc/Makefile new file mode 100644 index 0000000..298ea9e --- /dev/null +++ b/doc/Makefile @@ -0,0 +1,19 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line. +SPHINXOPTS = +SPHINXBUILD = sphinx-build +SOURCEDIR = . +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) \ No newline at end of file diff --git a/doc/_static/copybutton.js b/doc/_static/copybutton.js new file mode 100644 index 0000000..e567775 --- /dev/null +++ b/doc/_static/copybutton.js @@ -0,0 +1,65 @@ +// Copyright 2014 PSF. Licensed under the PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2 +// File originates from the cpython source found in Doc/tools/sphinxext/static/copybutton.js + +$(document).ready(function() { + /* Add a [>>>] button on the top-right corner of code samples to hide + * the >>> and ... prompts and the output and thus make the code + * copyable. */ + var div = $('.highlight-python .highlight,' + + '.highlight-default .highlight,' + + '.highlight-python3 .highlight') + var pre = div.find('pre'); + + // get the styles from the current theme + pre.parent().parent().css('position', 'relative'); + var hide_text = 'Hide the prompts and output'; + var show_text = 'Show the prompts and output'; + var border_width = pre.css('border-top-width'); + var border_style = pre.css('border-top-style'); + var border_color = pre.css('border-top-color'); + var button_styles = { + 'cursor':'pointer', 'position': 'absolute', 'top': '0', 'right': '0', + 'border-color': border_color, 'border-style': border_style, + 'border-width': border_width, 'color': border_color, 'text-size': '75%', + 'font-family': 'monospace', 'padding-left': '0.2em', 'padding-right': '0.2em', + 'border-radius': '0 3px 0 0' + } + + // create and add the button to all the code blocks that contain >>> + div.each(function(index) { + var jthis = $(this); + if (jthis.find('.gp').length > 0) { + var button = $('>>>'); + button.css(button_styles) + button.attr('title', hide_text); + button.data('hidden', 'false'); + jthis.prepend(button); + } + // tracebacks (.gt) contain bare text elements that need to be + // wrapped in a span to work with .nextUntil() (see later) + jthis.find('pre:has(.gt)').contents().filter(function() { + return ((this.nodeType == 3) && (this.data.trim().length > 0)); + }).wrap(''); + }); + + // define the behavior of the button when it's clicked + $('.copybutton').click(function(e){ + e.preventDefault(); + var button = $(this); + if (button.data('hidden') === 'false') { + // hide the code output + button.parent().find('.go, .gp, .gt').hide(); + button.next('pre').find('.gt').nextUntil('.gp, .go').css('visibility', 'hidden'); + button.css('text-decoration', 'line-through'); + button.attr('title', show_text); + button.data('hidden', 'true'); + } else { + // show the code output + button.parent().find('.go, .gp, .gt').show(); + button.next('pre').find('.gt').nextUntil('.gp, .go').css('visibility', 'visible'); + button.css('text-decoration', 'none'); + button.attr('title', hide_text); + button.data('hidden', 'false'); + } + }); +}); diff --git a/doc/changes.rst b/doc/changes.rst new file mode 100644 index 0000000..47b0c11 --- /dev/null +++ b/doc/changes.rst @@ -0,0 +1,7 @@ +User-visible Changes +==================== + +Version 2019.5 +-------------- + +* Initial release. diff --git a/doc/citing.rst b/doc/citing.rst new file mode 100644 index 0000000..eee8d9b --- /dev/null +++ b/doc/citing.rst @@ -0,0 +1,12 @@ +Citing pystella +=============== + +If you use :mod:`pystella` for your work, please cite the following pair of articles:: + + .. + +Here are Bibtex entries for your convenience:: + + @article{} + + @article{} diff --git a/doc/conf.py b/doc/conf.py new file mode 100644 index 0000000..e101765 --- /dev/null +++ b/doc/conf.py @@ -0,0 +1,176 @@ +# Configuration file for the Sphinx documentation builder. +# +# This file only contains a selection of the most common options. For a full +# list see the documentation: +# http://www.sphinx-doc.org/en/master/config + +# -- Path setup -------------------------------------------------------------- + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# +# import os +# import sys +# sys.path.insert(0, os.path.abspath('.')) + + +# -- Project information ----------------------------------------------------- + +project = 'pystella' +copyright = '2019, Zachary J Weiner' +author = 'Zachary J Weiner' + +import pkg_resources +version = pkg_resources.get_distribution('pystella').version +release = version + +# -- General configuration --------------------------------------------------- + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + 'sphinx.ext.autodoc', + 'sphinx.ext.intersphinx', + 'sphinx.ext.linkcode', + 'sphinx.ext.ifconfig', + # 'sphinx_copybutton' +] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path. +exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] + + +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = 'sphinx_rtd_theme' + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] + +intersphinx_mapping = { + 'python': ('https://docs.python.org/3', None), + 'numpy': ('https://docs.scipy.org/doc/numpy/', None), + 'scipy': ('https://docs.scipy.org/doc/scipy/reference/', None), + 'loopy': ('https://documen.tician.de/loopy', None), + 'pymbolic': ('https://documen.tician.de/pymbolic', None), + 'pyopencl': ('https://documen.tician.de/pyopencl', None), + 'mpi4py': ('https://mpi4py.readthedocs.io/en/stable/', None), + 'h5py': ('http://docs.h5py.org/en/stable/', None), + 'mpi4py_fft': ('https://mpi4py-fft.readthedocs.io/en/latest/', None), + } + +latex_elements = { + 'maxlistdepth': '99', +} + +autodoc_mock_imports = ['sympy', 'h5py'] + +import os +on_rtd = os.environ.get('READTHEDOCS') == 'True' + +if on_rtd: + exclude_patterns = ['*multigrid*'] + + +# setup copy button thing +def setup(app): + app.add_config_value('on_rtd', on_rtd, 'env') + app.add_javascript('copybutton.js') + + +# # Resolve function for the linkcode extension. +# def linkcode_resolve(domain, info): +# def find_source(): +# # try to find the file and line number, based on code from numpy: +# # https://github.com/numpy/numpy/blob/master/doc/source/conf.py#L286 +# import sys +# obj = sys.modules[info['module']] +# for part in info['fullname'].split('.'): +# obj = getattr(obj, part) +# import inspect +# import os +# fn = inspect.getsourcefile(obj) +# import pystella +# fn = os.path.relpath(fn, start=os.path.dirname(pystella.__file__)) +# source, lineno = inspect.getsourcelines(obj) +# return fn, lineno, lineno + len(source) - 1 + +# if domain != 'py' or not info['module']: +# return None +# try: +# filename = 'pystella/%s#L%d-L%d' % find_source() +# except Exception: +# filename = info['module'].replace('.', '/') + '.py' +# tag = 'master' # if 'dev' in release else ('v' + release) +# return "https://github.com/zachjweiner/pystella/blob/%s/%s" % (tag, filename) + + +def linkcode_resolve(domain, info): + """ + Determine the URL corresponding to Python object + copied from numpy's conf.py + """ + if domain != 'py': + return None + + import sys + import inspect + from os.path import relpath, dirname + + modname = info['module'] + fullname = info['fullname'] + + submod = sys.modules.get(modname) + if submod is None: + return None + + obj = submod + for part in fullname.split('.'): + try: + obj = getattr(obj, part) + except Exception: + return None + + # strip decorators, which would resolve to the source of the decorator + # possibly an upstream bug in getsourcefile, bpo-1764286 + try: + unwrap = inspect.unwrap + except AttributeError: + pass + else: + obj = unwrap(obj) + + try: + fn = inspect.getsourcefile(obj) + except Exception: + fn = None + if not fn: + return None + + try: + source, lineno = inspect.getsourcelines(obj) + except Exception: + lineno = None + + if lineno: + linespec = "#L%d-L%d" % (lineno, lineno + len(source) - 1) + else: + linespec = "" + + import pystella + fn = relpath(fn, start=dirname(pystella.__file__)) + + return "https://github.com/zachjweiner/pystella/blob/master/pystella/%s%s" % ( + fn, linespec) diff --git a/doc/faq.rst b/doc/faq.rst new file mode 100644 index 0000000..c296e61 --- /dev/null +++ b/doc/faq.rst @@ -0,0 +1,4 @@ +Frequently Asked Questions +========================== + +Coming soon. diff --git a/doc/index.rst b/doc/index.rst new file mode 100644 index 0000000..b30aa8c --- /dev/null +++ b/doc/index.rst @@ -0,0 +1,46 @@ +Welcome to pystella's documentation! +==================================== + +:mod:`pystella` is a package allowing the easy expression and evolution of PDE +systems with finite-difference methods. +Here's a simple example which evolves the scalar wave equation (without doing +anything interesting): + +.. literalinclude:: ../examples/wave-equation.py + +:mod:`pystella` uses :mod:`loopy` for code generation and :mod:`pyopencl` for +execution on CPUs and GPUs. +The above example can even be run in a jupyter notebook, but :mod:`pystella` also +supports MPI parallelization across multiple GPUs (or devices, more generally) via +:mod:`mpi4py`. + +For a more detailed tutorial on the tools to generate OpenCL kernels provided by +:mod:`loopy` and :mod:`pystella`, see +`codegen-tutorial.ipynb `_. + +Table of Contents +----------------- + +Please check :ref:`installation` to get started. + +.. toctree:: + :maxdepth: 2 + + installation + ref_codegen + ref_numerics + ref_fourier + ref_other + ref_multigrid + changes + license + faq + citing + + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` diff --git a/doc/installation.rst b/doc/installation.rst new file mode 100644 index 0000000..087a16f --- /dev/null +++ b/doc/installation.rst @@ -0,0 +1,108 @@ +.. highlight:: sh + +.. _installation: + +Installation +============ + +At the bare minimum, :mod:`pystella` requires :mod:`numpy`, +:mod:`loopy` for code generation, and :mod:`pyopencl` +(plus an OpenCL implementation) for kernel execution. +Optional dependencies (and what they are needed for) are: + +* :mod:`mpi4py` (and an MPI implementation) for distributed, multi-device execution + +* :mod:`gpyfft` (and :mod:`clfft` and :mod:`Cython`) for OpenCL + Fast Fourier Transforms (:class:`pystella.fourier.gDFT`) (e.g., to run on a GPU), + and/or :mod:`mpi4py_fft` (and :mod:`fftw`) for distributed, CPU FFTs + (:class:`pystella.fourier.pDFT`) + +* :mod:`h5py` (and :mod:`hdf5`) to use the convenience class + :class:`pystella.output.OutputFile` + +* :mod:`sympy`, for interoperability between :mod:`pymbolic` and :mod:`sympy` + +Fortunately, :mod:`conda` greatly simplifies the installation process with any +of these dependencies. +The included :file:`environment.yml` file provides a complete +installation by default, but one can delete any optional dependencies. + +Note that installation has only been tested on Linux, but similar steps should work +on macOS. + +Installation steps +------------------ + +Install via the following steps +(first modifying :file:`environment.yml` as desired): + +1. Install `miniconda `_ (if you + haven't already installed :mod:`conda`). + +2. Clone the repository:: + + git clone https://github.com/zachjweiner/pystella.git + +3. Create a :mod:`pystella` environment as specified by :file:`environment.yml`:: + + conda env create --file pystella/environment.yml + + - This will clone and install (i.e., as if via + :command:`python setup.py develop`) :mod:`gpyfft` and :mod:`loopy` into + :command:`src/`. To change this, first define the environment variable + :command:`PIP_SRC` to be your desired directory, + e.g., to your home directory with:: + + export PIP_SRC=~ + + Alternatively, update your active environment via:: + + conda env update --file pystella/environment.yml + +4. Activate the environment (if you created a new one):: + + conda activate pystella + + and set up :mod:`pystella`:: + + cd pystella/ && python setup.py develop + +To test that installation was successful, try running an example +(e.g., :code:`python examples/scalar-preheating.py`) or run the tests with :mod:`pytest`. + +Running on other devices (GPUs, etc.) +------------------------------------- + +The included :file:`environment.yml` installs `pocl `__, +which provides an OpenCL implementation on most CPUs. +Enabling execution on other hardware (e.g., GPUs) requires making :mod:`pyopencl` +aware of the corresponding OpenCL driver. +See :mod:`pyopencl`'s +`instructions `__ +(specifically, +`here `__). +For example, installing `CUDA `__ +installs the driver for NVIDIA GPUs; one must then merely copy +the :file:`nvidia.icd` file via:: + + cp /etc/OpenCL/vendors/nvidia.icd $CONDA_PREFIX/etc/OpenCL/vendors + +Using an existing MPI implementation +------------------------------------ + +To enable MPI support without :mod:`conda` installing its own MPI implementation +(e.g., to use the optimized implementation already provided on a cluster, etc.), +simply move :mod:`mpi4py` (and :mod:`mpi4py_fft`) below the :code:`pip` line +in :file:`environment.yml`:: + + ... + - pip: + - mpi4py + - mpi4py-fft + ... + +:mod:`pip`-installing :mod:`mpi4py` assumes that :code:`mpicc` is available +(check the output of :code:`which mpicc`). +See :mod:`mpi4py`'s +`instructions `__ for more +details. diff --git a/doc/license.rst b/doc/license.rst new file mode 100644 index 0000000..65e8756 --- /dev/null +++ b/doc/license.rst @@ -0,0 +1,24 @@ +Licensing +========= + +pystella is licensed to you under the MIT/X Consortium license: + +Copyright (C) 2019 Zachary J Weiner. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/doc/make.bat b/doc/make.bat new file mode 100644 index 0000000..27f573b --- /dev/null +++ b/doc/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=. +set BUILDDIR=_build + +if "%1" == "" goto help + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.http://sphinx-doc.org/ + exit /b 1 +) + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% + +:end +popd diff --git a/doc/ref_codegen.rst b/doc/ref_codegen.rst new file mode 100644 index 0000000..24710b5 --- /dev/null +++ b/doc/ref_codegen.rst @@ -0,0 +1,23 @@ +.. currentmodule:: pystella + +Reference: Code Generation +========================== + +Kernel creation +--------------- + +.. automodule:: pystella.elementwise + +.. automodule:: pystella.stencil + +.. automodule:: pystella.reduction + +Fields +------ + +.. automodule:: pystella.field + +Sectors +------- + +.. automodule:: pystella.sectors diff --git a/doc/ref_fourier.rst b/doc/ref_fourier.rst new file mode 100644 index 0000000..5e38e09 --- /dev/null +++ b/doc/ref_fourier.rst @@ -0,0 +1,29 @@ +.. currentmodule:: pystella + +Reference: Fourier Space +======================== + +Fast Fourier transforms +----------------------- + +.. automodule:: pystella.fourier.dft + +Field power spectra +------------------- + +.. autoclass:: PowerSpectra + +Generating Gaussian-random fields +--------------------------------- + +.. automodule:: pystella.fourier.rayleigh + +Vector and tensor projections +----------------------------- + +.. automodule:: pystella.fourier.projectors + +Spectral solvers +---------------- + +.. automodule:: pystella.fourier.derivs diff --git a/doc/ref_multigrid.rst b/doc/ref_multigrid.rst new file mode 100644 index 0000000..1ce880d --- /dev/null +++ b/doc/ref_multigrid.rst @@ -0,0 +1,19 @@ +.. currentmodule:: pystella.multigrid + +Reference: Multigrid +==================== + +Multigrid schemes +----------------- + +.. automodule:: pystella.multigrid + +Relaxation methods +------------------ + +.. automodule:: pystella.multigrid.relax + +Grid transfer operations +------------------------ + +.. automodule:: pystella.multigrid.transfer \ No newline at end of file diff --git a/doc/ref_numerics.rst b/doc/ref_numerics.rst new file mode 100644 index 0000000..81b0470 --- /dev/null +++ b/doc/ref_numerics.rst @@ -0,0 +1,14 @@ +.. currentmodule:: pystella + +Reference: Numerical Methods +============================ + +Time stepping +------------- + +.. automodule:: pystella.step + +Spatial derivatives +------------------- + +.. automodule:: pystella.derivs diff --git a/doc/ref_other.rst b/doc/ref_other.rst new file mode 100644 index 0000000..bf315ab --- /dev/null +++ b/doc/ref_other.rst @@ -0,0 +1,23 @@ +.. currentmodule:: pystella + +Reference: Other Functionality +============================== + +MPI parallelization +------------------- + +.. autoclass:: DomainDecomposition + +Expansion +--------- + +.. autoclass:: Expansion + +Utilities +--------- + +.. autoclass:: FieldStatistics + +.. autoclass:: pystella.output.OutputFile + +.. autofunction:: choose_device_and_make_context diff --git a/doc/requirements.txt b/doc/requirements.txt new file mode 100644 index 0000000..a626eaa --- /dev/null +++ b/doc/requirements.txt @@ -0,0 +1 @@ +sphinx==2.2.0 \ No newline at end of file diff --git a/environment.yml b/environment.yml new file mode 100644 index 0000000..ca7bbaa --- /dev/null +++ b/environment.yml @@ -0,0 +1,21 @@ +name: pystella + +channels: + - conda-forge + +dependencies: + - pip + - numpy + - sympy + - h5py + - pyopencl + - pocl + - fftw + - clfft + - Cython + - mpi4py + - mpi4py-fft + - pip: + - -e git+ssh://git@github.com/inducer/pymbolic.git@master#egg=pymbolic + - -e git+ssh://git@github.com/inducer/loopy.git@master#egg=loo.py + - -e git+ssh://git@gitlab.com/zachjweiner/gpyfft.git#egg=gpyfft diff --git a/examples/codegen-tutorial.ipynb b/examples/codegen-tutorial.ipynb new file mode 100644 index 0000000..9b9686c --- /dev/null +++ b/examples/codegen-tutorial.ipynb @@ -0,0 +1,1634 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Code generation with `pystella` and `loopy`" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pyopencl as cl\n", + "import pyopencl.array as cla\n", + "import pyopencl.clrandom as clr\n", + "import loopy as lp\n", + "from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_2" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Four ways to an OpenCL kernel\n", + "\n", + "We're going to create (and run!) an OpenCL kernel that computes\n", + "\n", + "$$\n", + "a(\\mathbf{x}) = b(\\mathbf{x})^2 \\cdot c(\\mathbf{x}) + z\n", + "$$\n", + "\n", + "in four different ways.\n", + "\n", + "First, we'll generate data and expected results with `numpy`." + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": {}, + "outputs": [], + "source": [ + "n = 64 # the grid size in each dimension\n", + "\n", + "b_h = np.random.rand(n, n, n).astype(np.float64)\n", + "c_h = np.random.rand(n, n, n).astype(np.float64)\n", + "z = np.array(3.2)\n", + "\n", + "a_true_h = b_h**2 * c_h + z" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. `pyopencl` arrays methods" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First, we need an OpenCL \"context\" (the umbrella construct for running programs with OpenCL) and a \"queue\" (to which kernels will be submitted to execute on a device).\n", + "\n", + "Check out `pyopencl`'s [docs](https://documen.tician.de/pyopencl/) for examples and details." + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": {}, + "outputs": [], + "source": [ + "ctx = cl.create_some_context()\n", + "queue = cl.CommandQueue(ctx)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`pyopencl` has a very convenient `Array` construct, which emulates `numpy` arrays---but with memory residing on the device.\n", + "We'll copy the data to the device and try it out." + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": {}, + "outputs": [], + "source": [ + "b = cla.to_device(queue, b_h)\n", + "c = cla.to_device(queue, c_h)\n", + "a = cla.zeros_like(b)\n", + "a_true = cla.to_device(queue, a_true_h)" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": {}, + "outputs": [], + "source": [ + "a[:] = b**2 * c + z" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To compare results, compute the maximum of `a - a_true`:" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": {}, + "outputs": [], + "source": [ + "difference = a - a_true" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "8.881784197001252e-16" + ] + }, + "execution_count": 67, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.max(difference.get())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note that we had to call `difference.get()`, which returns a `numpy.ndarray` on the \"host\" (the CPU) with data copied from `difference` (on the GPU).\n", + "We can also use `pyopencl`'s `max` method:" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(8.8817842e-16)" + ] + }, + "execution_count": 68, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cla.max(difference)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. OpenCL kernel generation with `loopy`" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Refer to `loopy`'s [tutorial](https://documen.tician.de/loopy/tutorial.html) to get started.\n", + "\n", + "Let's create a kernel which computes the above for $i \\in [0, N_x)$, $j \\in [0, N_y)$, and $k \\in [0, N_z)$:" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": {}, + "outputs": [], + "source": [ + "knl = lp.make_kernel(\n", + " \"{[i, j, k]: 0 <= i < Nx and 0 <= j < Ny and 0 <= k < Nz}\",\n", + " \"\"\"\n", + " a[i, j, k] = b[i, j, k]**2 * c[i, j, k] + z\n", + " \"\"\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Inspect your kernel to see if it appears correct by printing it.\n", + "How did `make_kernel` interpret the un-indexed scalar variable `z`?" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "---------------------------------------------------------------------------\n", + "KERNEL: loopy_kernel\n", + "---------------------------------------------------------------------------\n", + "ARGUMENTS:\n", + "Nx: ValueArg, type: \n", + "Ny: ValueArg, type: \n", + "Nz: ValueArg, type: \n", + "a: type: , shape: (Nx, Ny, Nz), dim_tags: (N2:stride:Nz*Ny, N1:stride:Nz, N0:stride:1) aspace: global\n", + "b: type: , shape: (Nx, Ny, Nz), dim_tags: (N2:stride:Nz*Ny, N1:stride:Nz, N0:stride:1) aspace: global\n", + "c: type: , shape: (Nx, Ny, Nz), dim_tags: (N2:stride:Nz*Ny, N1:stride:Nz, N0:stride:1) aspace: global\n", + "z: ValueArg, type: \n", + "---------------------------------------------------------------------------\n", + "DOMAINS:\n", + "[Nx, Ny, Nz] -> { [i, j, k] : 0 <= i < Nx and 0 <= j < Ny and 0 <= k < Nz }\n", + "---------------------------------------------------------------------------\n", + "INAME IMPLEMENTATION TAGS:\n", + "i: None\n", + "j: None\n", + "k: None\n", + "---------------------------------------------------------------------------\n", + "INSTRUCTIONS:\n", + "for k, j, i\n", + " \u001b[36ma[i, j, k]\u001b[0m = \u001b[35mb[i, j, k]**2*c[i, j, k] + z\u001b[0m {id=\u001b[32minsn\u001b[0m}\n", + "end k, j, i\n", + "---------------------------------------------------------------------------\n" + ] + } + ], + "source": [ + "print(knl)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can now test our kernel by directly calling the `knl` we created above:" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "metadata": {}, + "outputs": [], + "source": [ + "evt, _ = knl(queue, a=a, b=b, c=c, z=z)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note that `z` needs to be a `numpy.array` so that `loopy` can infer its datatype." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To compare results, compute the maximum of `a - a_true`:" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "metadata": {}, + "outputs": [], + "source": [ + "difference = a - a_true" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "8.881784197001252e-16" + ] + }, + "execution_count": 73, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.max(difference.get())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Parallelization" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now, GPUs are parallel, and the kernel we just wrote isn't making use of any parallelism.\n", + "First, let's check what OpenCL code was produced by setting the kernel option `write_cl` to `True`:" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "metadata": {}, + "outputs": [], + "source": [ + "knl = lp.set_options(knl, write_cl=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If we run the kernel now, it will print OpenCL code:" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[36m#\u001b[39;49;00m\u001b[36mdefine lid(N) ((int) get_local_id(N))\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n", + "\u001b[36m#\u001b[39;49;00m\u001b[36mdefine gid(N) ((int) get_group_id(N))\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n", + "\u001b[36m#\u001b[39;49;00m\u001b[36mif __OPENCL_C_VERSION__ < 120\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n", + "\u001b[36m#\u001b[39;49;00m\u001b[36mpragma OPENCL EXTENSION cl_khr_fp64: enable\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n", + "\u001b[36m#\u001b[39;49;00m\u001b[36mendif\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n", + "\n", + "__kernel \u001b[36mvoid\u001b[39;49;00m \u001b[32m__attribute__\u001b[39;49;00m ((reqd_work_group_size(\u001b[34m1\u001b[39;49;00m, \u001b[34m1\u001b[39;49;00m, \u001b[34m1\u001b[39;49;00m))) loopy_kernel(\u001b[36mint\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m Nx, \u001b[36mint\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m Ny, \u001b[36mint\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m Nz, __global \u001b[36mdouble\u001b[39;49;00m *__restrict__ a, __global \u001b[36mdouble\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m *__restrict__ b, __global \u001b[36mdouble\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m *__restrict__ c, \u001b[36mdouble\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m z)\n", + "{\n", + " \u001b[34mfor\u001b[39;49;00m (\u001b[36mint\u001b[39;49;00m k = \u001b[34m0\u001b[39;49;00m; k <= -\u001b[34m1\u001b[39;49;00m + Nz; ++k)\n", + " \u001b[34mif\u001b[39;49;00m (-\u001b[34m1\u001b[39;49;00m + Nx >= \u001b[34m0\u001b[39;49;00m && -\u001b[34m1\u001b[39;49;00m + Ny >= \u001b[34m0\u001b[39;49;00m)\n", + " \u001b[34mfor\u001b[39;49;00m (\u001b[36mint\u001b[39;49;00m j = \u001b[34m0\u001b[39;49;00m; j <= -\u001b[34m1\u001b[39;49;00m + Ny; ++j)\n", + " \u001b[34mfor\u001b[39;49;00m (\u001b[36mint\u001b[39;49;00m i = \u001b[34m0\u001b[39;49;00m; i <= -\u001b[34m1\u001b[39;49;00m + Nx; ++i)\n", + " a[Nz * Ny * i + Nz * j + k] = c[Nz * Ny * i + Nz * j + k] * b[Nz * Ny * i + Nz * j + k] * b[Nz * Ny * i + Nz * j + k] + z;\n", + "}\n", + "\n" + ] + } + ], + "source": [ + "evt, _ = knl(queue, a=a, b=b, c=c, z=z)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "It looks correct, array indexing and all.\n", + "But that's a lot of sequential loops!\n", + "`loopy` enables *code transformations* that (aim to) optimize the performance of a given kernel.\n", + "For instance, mapping the `k` index to the \"0\" index of the local and global OpenCL thread dimensions is accomplished via `loopy.split_iname`:" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "metadata": {}, + "outputs": [], + "source": [ + "knl = lp.split_iname(knl, \"k\", 32, outer_tag=\"g.0\", inner_tag=\"l.0\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's see what this did to the kernel:" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "---------------------------------------------------------------------------\n", + "KERNEL: loopy_kernel\n", + "---------------------------------------------------------------------------\n", + "ARGUMENTS:\n", + "Nx: ValueArg, type: \n", + "Ny: ValueArg, type: \n", + "Nz: ValueArg, type: \n", + "a: type: , shape: (Nx, Ny, Nz), dim_tags: (N2:stride:Nz*Ny, N1:stride:Nz, N0:stride:1) aspace: global\n", + "b: type: , shape: (Nx, Ny, Nz), dim_tags: (N2:stride:Nz*Ny, N1:stride:Nz, N0:stride:1) aspace: global\n", + "c: type: , shape: (Nx, Ny, Nz), dim_tags: (N2:stride:Nz*Ny, N1:stride:Nz, N0:stride:1) aspace: global\n", + "z: ValueArg, type: \n", + "---------------------------------------------------------------------------\n", + "DOMAINS:\n", + "[Nx, Ny, Nz] -> { [i, j, k_outer, k_inner] : 0 <= i < Nx and 0 <= j < Ny and k_inner >= 0 and -32k_outer <= k_inner <= 31 and k_inner < Nz - 32k_outer }\n", + "---------------------------------------------------------------------------\n", + "INAME IMPLEMENTATION TAGS:\n", + "i: None\n", + "j: None\n", + "k_inner: l.0\n", + "k_outer: g.0\n", + "---------------------------------------------------------------------------\n", + "INSTRUCTIONS:\n", + "for j, k_inner, k_outer, i\n", + " \u001b[36ma[i, j, k_inner + k_outer*32]\u001b[0m = \u001b[35mb[i, j, k_inner + k_outer*32]**2*c[i, j, k_inner + k_outer*32] + z\u001b[0m {id=\u001b[32minsn\u001b[0m}\n", + "end j, k_inner, k_outer, i\n", + "---------------------------------------------------------------------------\n" + ] + } + ], + "source": [ + "print(knl)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The above splits the loop over the \"iname\" `k` into (a yet-undetermined number of) blocks of 32 threads each." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The \"iname\" (index name) `k` is gone, repalced by the combination `k_inner + k_outer * 32`.\n", + "Observe also that the \"implementation\" of these new inames has been tagged to map to axes of global and local parallelization (as we specified).\n", + "If we run the kernel now (enabling `write_cl` again), we see that the sequential loop over `k` is gone, and the indexing of `k` has been replaced by `32 * gid(0) + lid(0)`." + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[36m#\u001b[39;49;00m\u001b[36mdefine lid(N) ((int) get_local_id(N))\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n", + "\u001b[36m#\u001b[39;49;00m\u001b[36mdefine gid(N) ((int) get_group_id(N))\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n", + "\u001b[36m#\u001b[39;49;00m\u001b[36mif __OPENCL_C_VERSION__ < 120\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n", + "\u001b[36m#\u001b[39;49;00m\u001b[36mpragma OPENCL EXTENSION cl_khr_fp64: enable\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n", + "\u001b[36m#\u001b[39;49;00m\u001b[36mendif\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n", + "\n", + "__kernel \u001b[36mvoid\u001b[39;49;00m \u001b[32m__attribute__\u001b[39;49;00m ((reqd_work_group_size(\u001b[34m32\u001b[39;49;00m, \u001b[34m1\u001b[39;49;00m, \u001b[34m1\u001b[39;49;00m))) loopy_kernel(\u001b[36mint\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m Nx, \u001b[36mint\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m Ny, \u001b[36mint\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m Nz, __global \u001b[36mdouble\u001b[39;49;00m *__restrict__ a, __global \u001b[36mdouble\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m *__restrict__ b, __global \u001b[36mdouble\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m *__restrict__ c, \u001b[36mdouble\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m z)\n", + "{\n", + " \u001b[34mif\u001b[39;49;00m (-\u001b[34m1\u001b[39;49;00m + Nx >= \u001b[34m0\u001b[39;49;00m && -\u001b[34m1\u001b[39;49;00m + Ny >= \u001b[34m0\u001b[39;49;00m && -\u001b[34m1\u001b[39;49;00m + -\u001b[34m32\u001b[39;49;00m * gid(\u001b[34m0\u001b[39;49;00m) + -\u001b[34m1\u001b[39;49;00m * lid(\u001b[34m0\u001b[39;49;00m) + Nz >= \u001b[34m0\u001b[39;49;00m)\n", + " \u001b[34mfor\u001b[39;49;00m (\u001b[36mint\u001b[39;49;00m j = \u001b[34m0\u001b[39;49;00m; j <= -\u001b[34m1\u001b[39;49;00m + Ny; ++j)\n", + " \u001b[34mfor\u001b[39;49;00m (\u001b[36mint\u001b[39;49;00m i = \u001b[34m0\u001b[39;49;00m; i <= -\u001b[34m1\u001b[39;49;00m + Nx; ++i)\n", + " a[Nz * Ny * i + Nz * j + \u001b[34m32\u001b[39;49;00m * gid(\u001b[34m0\u001b[39;49;00m) + lid(\u001b[34m0\u001b[39;49;00m)] = c[Nz * Ny * i + Nz * j + \u001b[34m32\u001b[39;49;00m * gid(\u001b[34m0\u001b[39;49;00m) + lid(\u001b[34m0\u001b[39;49;00m)] * b[Nz * Ny * i + Nz * j + \u001b[34m32\u001b[39;49;00m * gid(\u001b[34m0\u001b[39;49;00m) + lid(\u001b[34m0\u001b[39;49;00m)] * b[Nz * Ny * i + Nz * j + \u001b[34m32\u001b[39;49;00m * gid(\u001b[34m0\u001b[39;49;00m) + lid(\u001b[34m0\u001b[39;49;00m)] + z;\n", + "}\n", + "\n" + ] + } + ], + "source": [ + "knl = lp.set_options(knl, write_cl=True)\n", + "evt, _ = knl(queue, a=a, b=b, c=c, z=z)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note that no loops over `k_inner` nor `k_outer` appear. They have been mapped to the \"hardware\" axes of parallelization: the kernel implicitly runs over a bunch of work groups (one for each value of `gid(0)`), each with 32 work items (each with their own index `lid(0)`)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "And the result is still correct!" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "8.881784197001252e-16" + ] + }, + "execution_count": 79, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "difference = a - a_true\n", + "np.max(difference.get())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can achieve more parallelism by \"tagging\" `j` and `i` as, say, global indices 1 and 2." + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "metadata": {}, + "outputs": [], + "source": [ + "knl = lp.tag_inames(knl, {'j': 'g.1', 'i': 'g.2'})" + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[36m#\u001b[39;49;00m\u001b[36mdefine lid(N) ((int) get_local_id(N))\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n", + "\u001b[36m#\u001b[39;49;00m\u001b[36mdefine gid(N) ((int) get_group_id(N))\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n", + "\u001b[36m#\u001b[39;49;00m\u001b[36mif __OPENCL_C_VERSION__ < 120\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n", + "\u001b[36m#\u001b[39;49;00m\u001b[36mpragma OPENCL EXTENSION cl_khr_fp64: enable\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n", + "\u001b[36m#\u001b[39;49;00m\u001b[36mendif\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n", + "\n", + "__kernel \u001b[36mvoid\u001b[39;49;00m \u001b[32m__attribute__\u001b[39;49;00m ((reqd_work_group_size(\u001b[34m32\u001b[39;49;00m, \u001b[34m1\u001b[39;49;00m, \u001b[34m1\u001b[39;49;00m))) loopy_kernel(\u001b[36mint\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m Nx, \u001b[36mint\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m Ny, \u001b[36mint\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m Nz, __global \u001b[36mdouble\u001b[39;49;00m *__restrict__ a, __global \u001b[36mdouble\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m *__restrict__ b, __global \u001b[36mdouble\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m *__restrict__ c, \u001b[36mdouble\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m z)\n", + "{\n", + " \u001b[34mif\u001b[39;49;00m (-\u001b[34m1\u001b[39;49;00m + -\u001b[34m32\u001b[39;49;00m * gid(\u001b[34m0\u001b[39;49;00m) + -\u001b[34m1\u001b[39;49;00m * lid(\u001b[34m0\u001b[39;49;00m) + Nz >= \u001b[34m0\u001b[39;49;00m)\n", + " a[Nz * Ny * gid(\u001b[34m2\u001b[39;49;00m) + Nz * gid(\u001b[34m1\u001b[39;49;00m) + \u001b[34m32\u001b[39;49;00m * gid(\u001b[34m0\u001b[39;49;00m) + lid(\u001b[34m0\u001b[39;49;00m)] = c[Nz * Ny * gid(\u001b[34m2\u001b[39;49;00m) + Nz * gid(\u001b[34m1\u001b[39;49;00m) + \u001b[34m32\u001b[39;49;00m * gid(\u001b[34m0\u001b[39;49;00m) + lid(\u001b[34m0\u001b[39;49;00m)] * b[Nz * Ny * gid(\u001b[34m2\u001b[39;49;00m) + Nz * gid(\u001b[34m1\u001b[39;49;00m) + \u001b[34m32\u001b[39;49;00m * gid(\u001b[34m0\u001b[39;49;00m) + lid(\u001b[34m0\u001b[39;49;00m)] * b[Nz * Ny * gid(\u001b[34m2\u001b[39;49;00m) + Nz * gid(\u001b[34m1\u001b[39;49;00m) + \u001b[34m32\u001b[39;49;00m * gid(\u001b[34m0\u001b[39;49;00m) + lid(\u001b[34m0\u001b[39;49;00m)] + z;\n", + "}\n", + "\n" + ] + } + ], + "source": [ + "knl = lp.set_options(knl, write_cl=True)\n", + "evt, _ = knl(queue, a=a, b=b, c=c, z=z)" + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "8.881784197001252e-16" + ] + }, + "execution_count": 82, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "difference = a - a_true\n", + "np.max(difference.get())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Observe the pesky `if` statement, which is ensuring that no out-of-bounds array elements are accessed by the replacement of `k` with `32 * gid(0) + lid(0)`.\n", + "If we are *sure* that this won't happen (namely, that `Nz` is divisble by 32), we can add this as an assumption:" + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "metadata": {}, + "outputs": [], + "source": [ + "knl = lp.assume(knl, 'Nz mod 32 = 0')" + ] + }, + { + "cell_type": "code", + "execution_count": 84, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[36m#\u001b[39;49;00m\u001b[36mdefine lid(N) ((int) get_local_id(N))\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n", + "\u001b[36m#\u001b[39;49;00m\u001b[36mdefine gid(N) ((int) get_group_id(N))\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n", + "\u001b[36m#\u001b[39;49;00m\u001b[36mif __OPENCL_C_VERSION__ < 120\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n", + "\u001b[36m#\u001b[39;49;00m\u001b[36mpragma OPENCL EXTENSION cl_khr_fp64: enable\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n", + "\u001b[36m#\u001b[39;49;00m\u001b[36mendif\u001b[39;49;00m\u001b[36m\u001b[39;49;00m\n", + "\n", + "__kernel \u001b[36mvoid\u001b[39;49;00m \u001b[32m__attribute__\u001b[39;49;00m ((reqd_work_group_size(\u001b[34m32\u001b[39;49;00m, \u001b[34m1\u001b[39;49;00m, \u001b[34m1\u001b[39;49;00m))) loopy_kernel(\u001b[36mint\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m Nx, \u001b[36mint\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m Ny, \u001b[36mint\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m Nz, __global \u001b[36mdouble\u001b[39;49;00m *__restrict__ a, __global \u001b[36mdouble\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m *__restrict__ b, __global \u001b[36mdouble\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m *__restrict__ c, \u001b[36mdouble\u001b[39;49;00m \u001b[34mconst\u001b[39;49;00m z)\n", + "{\n", + " a[Nz * Ny * gid(\u001b[34m2\u001b[39;49;00m) + Nz * gid(\u001b[34m1\u001b[39;49;00m) + \u001b[34m32\u001b[39;49;00m * gid(\u001b[34m0\u001b[39;49;00m) + lid(\u001b[34m0\u001b[39;49;00m)] = c[Nz * Ny * gid(\u001b[34m2\u001b[39;49;00m) + Nz * gid(\u001b[34m1\u001b[39;49;00m) + \u001b[34m32\u001b[39;49;00m * gid(\u001b[34m0\u001b[39;49;00m) + lid(\u001b[34m0\u001b[39;49;00m)] * b[Nz * Ny * gid(\u001b[34m2\u001b[39;49;00m) + Nz * gid(\u001b[34m1\u001b[39;49;00m) + \u001b[34m32\u001b[39;49;00m * gid(\u001b[34m0\u001b[39;49;00m) + lid(\u001b[34m0\u001b[39;49;00m)] * b[Nz * Ny * gid(\u001b[34m2\u001b[39;49;00m) + Nz * gid(\u001b[34m1\u001b[39;49;00m) + \u001b[34m32\u001b[39;49;00m * gid(\u001b[34m0\u001b[39;49;00m) + lid(\u001b[34m0\u001b[39;49;00m)] + z;\n", + "}\n", + "\n" + ] + } + ], + "source": [ + "knl = lp.set_options(knl, write_cl=True)\n", + "evt, (x,) = knl(queue, a=a, b=b, c=c, z=z)" + ] + }, + { + "cell_type": "code", + "execution_count": 85, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "8.881784197001252e-16" + ] + }, + "execution_count": 85, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "difference = a - a_true\n", + "np.max(difference.get())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Symbolic representation of code" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First, let's inspect the actual instruction `knl` is executing as represented by `loopy` kernel objects:" + ] + }, + { + "cell_type": "code", + "execution_count": 86, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[Assignment(groups=frozenset(), within_inames=frozenset({'j', 'k_inner', 'k_outer', 'i'}), expression=Sum((Product((Power(Subscript(..., (..., ..., ...)), 2), Subscript(Variable('c'), (Variable('i'), Variable('j'), Sum((..., ...)))))), Variable('z'))), priority=0, depends_on=frozenset(), id='insn', within_inames_is_final=False, temp_var_type=Optional(), depends_on_is_final=False, conflicts_with_groups=frozenset(), boostable=None, atomicity=(), no_sync_with=frozenset(), assignee=Subscript(Variable('a'), (Variable('i'), Variable('j'), Sum((Variable('k_inner'), Product((Variable('k_outer'), 32)))))), tags=frozenset(), boostable_into=None, predicates=frozenset())]" + ] + }, + "execution_count": 86, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "knl.instructions" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This has quite a lot of details - but we're interested in the \"assignee\" and the \"expression\" of the first (and only) instruction:" + ] + }, + { + "cell_type": "code", + "execution_count": 87, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Subscript(Variable('a'), (Variable('i'), Variable('j'), Sum((Variable('k_inner'), Product((Variable('k_outer'), 32))))))" + ] + }, + "execution_count": 87, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "knl.instructions[0].assignee" + ] + }, + { + "cell_type": "code", + "execution_count": 88, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Sum((Product((Power(Subscript(..., (..., ..., ...)), 2), Subscript(Variable('c'), (Variable('i'), Variable('j'), Sum((..., ...)))))), Variable('z')))" + ] + }, + "execution_count": 88, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "knl.instructions[0].expression" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This is an *expression tree*. We can actually see what's going on if we `print(assignee, '=', expression)`:" + ] + }, + { + "cell_type": "code", + "execution_count": 89, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "a[i, j, k_inner + k_outer*32] = b[i, j, k_inner + k_outer*32]**2*c[i, j, k_inner + k_outer*32] + z\n" + ] + } + ], + "source": [ + "print(knl.instructions[0].assignee, '=', knl.instructions[0].expression)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This is exactly the statement that appears if we print `knl` itself:" + ] + }, + { + "cell_type": "code", + "execution_count": 90, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "---------------------------------------------------------------------------\n", + "KERNEL: loopy_kernel\n", + "---------------------------------------------------------------------------\n", + "ARGUMENTS:\n", + "Nx: ValueArg, type: \n", + "Ny: ValueArg, type: \n", + "Nz: ValueArg, type: \n", + "a: type: , shape: (Nx, Ny, Nz), dim_tags: (N2:stride:Nz*Ny, N1:stride:Nz, N0:stride:1) aspace: global\n", + "b: type: , shape: (Nx, Ny, Nz), dim_tags: (N2:stride:Nz*Ny, N1:stride:Nz, N0:stride:1) aspace: global\n", + "c: type: , shape: (Nx, Ny, Nz), dim_tags: (N2:stride:Nz*Ny, N1:stride:Nz, N0:stride:1) aspace: global\n", + "z: ValueArg, type: \n", + "---------------------------------------------------------------------------\n", + "DOMAINS:\n", + "[Nx, Ny, Nz] -> { [i, j, k_outer, k_inner] : 0 <= i < Nx and 0 <= j < Ny and k_inner >= 0 and -32k_outer <= k_inner <= 31 and k_inner < Nz - 32k_outer }\n", + "---------------------------------------------------------------------------\n", + "INAME IMPLEMENTATION TAGS:\n", + "i: g.2\n", + "j: g.1\n", + "k_inner: l.0\n", + "k_outer: g.0\n", + "---------------------------------------------------------------------------\n", + "INSTRUCTIONS:\n", + "for j, k_inner, k_outer, i\n", + " \u001b[36ma[i, j, k_inner + k_outer*32]\u001b[0m = \u001b[35mb[i, j, k_inner + k_outer*32]**2*c[i, j, k_inner + k_outer*32] + z\u001b[0m {id=\u001b[32minsn\u001b[0m}\n", + "end j, k_inner, k_outer, i\n", + "---------------------------------------------------------------------------\n" + ] + } + ], + "source": [ + "print(knl)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's reproduce this instruction using `pymbolic`. First we need to import all the \"primitive\" objects:" + ] + }, + { + "cell_type": "code", + "execution_count": 91, + "metadata": {}, + "outputs": [], + "source": [ + "import pymbolic.primitives as p" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now create some named \"Variables\" (prepending their name with an underscore so they don't overwrite our `pyopencl` arrays):" + ] + }, + { + "cell_type": "code", + "execution_count": 92, + "metadata": {}, + "outputs": [], + "source": [ + "_a = p.Variable('a')\n", + "_b = p.Variable('b')\n", + "_c = p.Variable('c')\n", + "_z = p.Variable('z')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We also need some index variables:" + ] + }, + { + "cell_type": "code", + "execution_count": 93, + "metadata": {}, + "outputs": [], + "source": [ + "i = p.Variable('i')\n", + "j = p.Variable('j')\n", + "k = p.Variable('k')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If we index (or \"subscript\") a `Variable`, we get a `Subscript` object:" + ] + }, + { + "cell_type": "code", + "execution_count": 94, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Subscript(Variable('a'), (Variable('i'), Variable('j'), Variable('k')))" + ] + }, + "execution_count": 94, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "_a[i, j, k]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This matches the `assignee` of the instruction above. Let's try the `expression`:" + ] + }, + { + "cell_type": "code", + "execution_count": 95, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Sum((Product((Power(Subscript(..., (..., ..., ...)), 2), Subscript(Variable('c'), (Variable('i'), Variable('j'), Variable('k'))))), Variable('z')))" + ] + }, + "execution_count": 95, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "_b[i, j, k]**2 * _c[i, j, k] + _z" + ] + }, + { + "cell_type": "code", + "execution_count": 96, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "b[i, j, k]**2*c[i, j, k] + z\n" + ] + } + ], + "source": [ + "print(_b[i, j, k]**2 * _c[i, j, k] + _z)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Looks good." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The lesson here is that `pymbolic` provides a symbolic way to generate code.\n", + "Rather than inputting strings of instructions to `loopy.make_kernel` (which, as we saw above, are parsed to `pymbolic` expressions by `loopy` behind the scenes!), we can work with the symbolic code directly.\n", + "This unlocks a lot of potential to actually use python as a scripting language to generate the code (which `loopy` uses to subsequently generate OpenCL code).\n", + "`pymbolic` can be thought of as a very simple computer algebra system (it can take derivatives, for instance), but geared toward manipulating and generating code." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. `pystella.ElementWiseMap`" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's see how `pystella` provides a simpler interface to `loopy` to turn `pymbolic` expressions into kernels.\n", + "The fundamental representation is python's dictionary type---key-value pairs which correspond to assignee-expression pairs." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First, we'll recreate our same kernel again." + ] + }, + { + "cell_type": "code", + "execution_count": 97, + "metadata": {}, + "outputs": [], + "source": [ + "import pystella as ps" + ] + }, + { + "cell_type": "code", + "execution_count": 98, + "metadata": {}, + "outputs": [], + "source": [ + "map_dict = {\n", + " _a[i, j, k]: _b[i, j, k]**2 * _c[i, j, k] + _z\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 99, + "metadata": {}, + "outputs": [], + "source": [ + "ewmap = ps.ElementWiseMap(map_dict, dtype='float64', h=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 100, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "---------------------------------------------------------------------------\n", + "KERNEL: loopy_kernel\n", + "---------------------------------------------------------------------------\n", + "ARGUMENTS:\n", + "Nx: ValueArg, type: np:dtype('int64')\n", + "Ny: ValueArg, type: np:dtype('int64')\n", + "Nz: ValueArg, type: np:dtype('int64')\n", + "a: type: np:dtype('float64'), shape: (Nx, Ny, Nz), dim_tags: (N2:stride:Nz*Ny, N1:stride:Nz, N0:stride:1), offset: aspace: global\n", + "b: type: np:dtype('float64'), shape: (Nx, Ny, Nz), dim_tags: (N2:stride:Nz*Ny, N1:stride:Nz, N0:stride:1), offset: aspace: global\n", + "c: type: np:dtype('float64'), shape: (Nx, Ny, Nz), dim_tags: (N2:stride:Nz*Ny, N1:stride:Nz, N0:stride:1), offset: aspace: global\n", + "z: ValueArg, type: np:dtype('float64')\n", + "---------------------------------------------------------------------------\n", + "DOMAINS:\n", + "[Nx, Ny, Nz] -> { [k_outer, k_inner, j_outer, j_inner, i_outer, i_inner] : i_inner = 0 and k_inner >= 0 and -16k_outer <= k_inner <= 15 and k_inner < Nz - 16k_outer and j_inner >= 0 and -4j_outer <= j_inner <= 3 and j_inner < Ny - 4j_outer and 0 <= i_outer < Nx }\n", + "---------------------------------------------------------------------------\n", + "INAME IMPLEMENTATION TAGS:\n", + "i_inner: unr\n", + "i_outer: g.2\n", + "j_inner: l.1\n", + "j_outer: g.1\n", + "k_inner: l.0\n", + "k_outer: g.0\n", + "---------------------------------------------------------------------------\n", + "INSTRUCTIONS:\n", + "for i_inner, j_outer, i_outer, j_inner, k_outer, k_inner\n", + " \u001b[36ma[i_inner + i_outer, j_inner + j_outer*4, k_inner + k_outer*16]\u001b[0m = \u001b[35mb[i_inner + i_outer, j_inner + j_outer*4, k_inner + k_outer*16]**2*c[i_inner + i_outer, j_inner + j_outer*4, k_inner + k_outer*16] + z\u001b[0m {id=\u001b[32minsn\u001b[0m}\n", + "end i_inner, j_outer, i_outer, j_inner, k_outer, k_inner\n", + "---------------------------------------------------------------------------\n" + ] + } + ], + "source": [ + "print(ewmap.knl)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "It's the same kernel! Already parallelized---`ElementWiseMap` implements a default parallelization that works well for these types of operations." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's check the results:" + ] + }, + { + "cell_type": "code", + "execution_count": 101, + "metadata": {}, + "outputs": [], + "source": [ + "evt, _ = ewmap(queue, a=a, b=b, c=c, z=z)" + ] + }, + { + "cell_type": "code", + "execution_count": 102, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "8.881784197001252e-16" + ] + }, + "execution_count": 102, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "difference = a - a_true\n", + "np.max(difference.get())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. Using `pystella.Field`'s as input to `pystella.ElementWiseMap`" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`pystella.Field`'s can make our life even easier.\n", + "Constantly indexing with `[i, j, k]` can get pretty annoying, and can be automate with `pymbolic`'s mapping methods." + ] + }, + { + "cell_type": "code", + "execution_count": 103, + "metadata": {}, + "outputs": [], + "source": [ + "_a = ps.Field('a')\n", + "_b = ps.Field('b')\n", + "_c = ps.Field('c')\n", + "_z = p.Variable('z')\n", + "\n", + "map_dict = {\n", + " _a: _b**2 * _c + _z\n", + "}\n", + "\n", + "ewmap = ps.ElementWiseMap(map_dict, dtype='float64', h=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 104, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "---------------------------------------------------------------------------\n", + "KERNEL: loopy_kernel\n", + "---------------------------------------------------------------------------\n", + "ARGUMENTS:\n", + "Nx: ValueArg, type: np:dtype('int64')\n", + "Ny: ValueArg, type: np:dtype('int64')\n", + "Nz: ValueArg, type: np:dtype('int64')\n", + "a: type: np:dtype('float64'), shape: (Nx, Ny, Nz), dim_tags: (N2:stride:Nz*Ny, N1:stride:Nz, N0:stride:1), offset: aspace: global\n", + "b: type: np:dtype('float64'), shape: (Nx, Ny, Nz), dim_tags: (N2:stride:Nz*Ny, N1:stride:Nz, N0:stride:1), offset: aspace: global\n", + "c: type: np:dtype('float64'), shape: (Nx, Ny, Nz), dim_tags: (N2:stride:Nz*Ny, N1:stride:Nz, N0:stride:1), offset: aspace: global\n", + "z: ValueArg, type: np:dtype('float64')\n", + "---------------------------------------------------------------------------\n", + "DOMAINS:\n", + "[Nx, Ny, Nz] -> { [k_outer, k_inner, j_outer, j_inner, i_outer, i_inner] : i_inner = 0 and k_inner >= 0 and -16k_outer <= k_inner <= 15 and k_inner < Nz - 16k_outer and j_inner >= 0 and -4j_outer <= j_inner <= 3 and j_inner < Ny - 4j_outer and 0 <= i_outer < Nx }\n", + "---------------------------------------------------------------------------\n", + "INAME IMPLEMENTATION TAGS:\n", + "i_inner: unr\n", + "i_outer: g.2\n", + "j_inner: l.1\n", + "j_outer: g.1\n", + "k_inner: l.0\n", + "k_outer: g.0\n", + "---------------------------------------------------------------------------\n", + "INSTRUCTIONS:\n", + "for i_inner, j_outer, i_outer, j_inner, k_outer, k_inner\n", + " \u001b[36ma[i_inner + i_outer, j_inner + j_outer*4, k_inner + k_outer*16]\u001b[0m = \u001b[35mb[i_inner + i_outer, j_inner + j_outer*4, k_inner + k_outer*16]**2*c[i_inner + i_outer, j_inner + j_outer*4, k_inner + k_outer*16] + z\u001b[0m {id=\u001b[32minsn\u001b[0m}\n", + "end i_inner, j_outer, i_outer, j_inner, k_outer, k_inner\n", + "---------------------------------------------------------------------------\n" + ] + } + ], + "source": [ + "print(ewmap.knl)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We made no mention of indices or subscripts, yet the kernels are identical." + ] + }, + { + "cell_type": "code", + "execution_count": 105, + "metadata": {}, + "outputs": [], + "source": [ + "evt, _ = ewmap(queue, a=a, b=b, c=c, z=z)" + ] + }, + { + "cell_type": "code", + "execution_count": 106, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "8.881784197001252e-16" + ] + }, + "execution_count": 106, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "difference = a - a_true\n", + "np.max(difference.get())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To further illustrate why `pystella.Field`'s are useful, consider the (extremely common) case where arrays are padded in each direction.\n", + "This is implemented by passing a value for `offset`." + ] + }, + { + "cell_type": "code", + "execution_count": 107, + "metadata": {}, + "outputs": [], + "source": [ + "_a = ps.Field('a', offset='h')\n", + "_b = ps.Field('b', offset='h')\n", + "_c = ps.Field('c', offset='h')\n", + "_z = p.Variable('z')\n", + "\n", + "map_dict = {\n", + " _a: _b**2 * _c + _z\n", + "}\n", + "\n", + "ewmap = ps.ElementWiseMap(map_dict, dtype='float64', h=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 108, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "---------------------------------------------------------------------------\n", + "KERNEL: loopy_kernel\n", + "---------------------------------------------------------------------------\n", + "ARGUMENTS:\n", + "Nx: ValueArg, type: np:dtype('int64')\n", + "Ny: ValueArg, type: np:dtype('int64')\n", + "Nz: ValueArg, type: np:dtype('int64')\n", + "a: type: np:dtype('float64'), shape: (Nx + 1, Ny + 1, Nz + 1), dim_tags: (N2:stride:(Nz + 1)*(Ny + 1), N1:stride:Nz + 1, N0:stride:1), offset: aspace: global\n", + "b: type: np:dtype('float64'), shape: (Nx + 1, Ny + 1, Nz + 1), dim_tags: (N2:stride:(Nz + 1)*(Ny + 1), N1:stride:Nz + 1, N0:stride:1), offset: aspace: global\n", + "c: type: np:dtype('float64'), shape: (Nx + 1, Ny + 1, Nz + 1), dim_tags: (N2:stride:(Nz + 1)*(Ny + 1), N1:stride:Nz + 1, N0:stride:1), offset: aspace: global\n", + "z: ValueArg, type: np:dtype('float64')\n", + "---------------------------------------------------------------------------\n", + "DOMAINS:\n", + "[Nx, Ny, Nz] -> { [k_outer, k_inner, j_outer, j_inner, i_outer, i_inner] : i_inner = 0 and k_inner >= 0 and -16k_outer <= k_inner <= 15 and k_inner < Nz - 16k_outer and j_inner >= 0 and -4j_outer <= j_inner <= 3 and j_inner < Ny - 4j_outer and 0 <= i_outer < Nx }\n", + "---------------------------------------------------------------------------\n", + "INAME IMPLEMENTATION TAGS:\n", + "i_inner: unr\n", + "i_outer: g.2\n", + "j_inner: l.1\n", + "j_outer: g.1\n", + "k_inner: l.0\n", + "k_outer: g.0\n", + "---------------------------------------------------------------------------\n", + "INSTRUCTIONS:\n", + "for i_inner, j_outer, i_outer, j_inner, k_outer, k_inner\n", + " \u001b[36ma[1 + i_inner + i_outer, 1 + j_inner + j_outer*4, 1 + k_inner + k_outer*16]\u001b[0m = \u001b[35mb[1 + i_inner + i_outer, 1 + j_inner + j_outer*4, 1 + k_inner + k_outer*16]**2*c[1 + i_inner + i_outer, 1 + j_inner + j_outer*4, 1 + k_inner + k_outer*16] + z\u001b[0m {id=\u001b[32minsn\u001b[0m}\n", + "end i_inner, j_outer, i_outer, j_inner, k_outer, k_inner\n", + "---------------------------------------------------------------------------\n" + ] + } + ], + "source": [ + "print(ewmap.knl)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This would get quite cumbersome to type out manually, and it's easy to forget which arrays should be padded.\n", + "From experience, it can be difficult to see errors in array indexing." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Indexer" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Behind the scenes, `ElementWiseMap` is calling `Indexer`:" + ] + }, + { + "cell_type": "code", + "execution_count": 109, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "f\n", + "f[i + h, j + 3, k + 2]\n" + ] + } + ], + "source": [ + "_f = ps.Field('f', offset=('h', 3, 2))\n", + "print(_f)\n", + "print(ps.Indexer(_f))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Exercise: computing spatial gradients with `pystella.Field` and the `pystella.Stencil` kernel generator" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`Field`'s also have a `shift` method, which does what it sounds like:" + ] + }, + { + "cell_type": "code", + "execution_count": 110, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "a[i + h + 1, j + h, k + h]\n" + ] + } + ], + "source": [ + "print(ps.Indexer(_a.shift((1, 0, 0))))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For this type of kernel, `pystella.Stencil` provides good parallelization (by allowing arrays to be *prefetched* into so-called \"shared\" memory)." + ] + }, + { + "cell_type": "code", + "execution_count": 111, + "metadata": {}, + "outputs": [], + "source": [ + "f = ps.Field('f', offset=1)\n", + "\n", + "dfdx = ps.Field('dfdx', offset=0)\n", + "dfdy = ps.Field('dfdy', offset=0)\n", + "dfdz = ps.Field('dfdz', offset=0)\n", + "\n", + "dx = p.Variable('dx')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Fill in `map_dict` below to compute the second-order centered-difference approximation to the gradient of `f`:" + ] + }, + { + "cell_type": "code", + "execution_count": 112, + "metadata": {}, + "outputs": [], + "source": [ + "map_dict = {\n", + " dfdx: (f.shift((1, 0, 0)) - f.shift((-1, 0, 0))) / 2 / dx,\n", + " dfdy: (f.shift((0, 1, 0)) - f.shift((0, -1, 0))) / 2 / dx,\n", + " dfdz: (f.shift((0, 0, 1)) - f.shift((0, 0, -1))) / 2 / dx,\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 113, + "metadata": {}, + "outputs": [], + "source": [ + "stencil = ps.Stencil(map_dict, prefetch_args=['f'], h=1, dtype='float64')" + ] + }, + { + "cell_type": "code", + "execution_count": 114, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "---------------------------------------------------------------------------\n", + "KERNEL: loopy_kernel\n", + "---------------------------------------------------------------------------\n", + "ARGUMENTS:\n", + "Nx: ValueArg, type: np:dtype('int64')\n", + "Ny: ValueArg, type: np:dtype('int64')\n", + "Nz: ValueArg, type: np:dtype('int64')\n", + "dfdx: type: np:dtype('float64'), shape: (Nx, Ny, Nz), dim_tags: (N2:stride:Nz*Ny, N1:stride:Nz, N0:stride:1), offset: aspace: global\n", + "dfdy: type: np:dtype('float64'), shape: (Nx, Ny, Nz), dim_tags: (N2:stride:Nz*Ny, N1:stride:Nz, N0:stride:1), offset: aspace: global\n", + "dfdz: type: np:dtype('float64'), shape: (Nx, Ny, Nz), dim_tags: (N2:stride:Nz*Ny, N1:stride:Nz, N0:stride:1), offset: aspace: global\n", + "dx: ValueArg, type: np:dtype('float64')\n", + "f: type: np:dtype('float64'), shape: (2 + Nx, 2 + Ny, 2 + Nz), dim_tags: (N2:stride:(2 + Nz)*(2 + Ny), N1:stride:2 + Nz, N0:stride:1), offset: aspace: global\n", + "---------------------------------------------------------------------------\n", + "DOMAINS:\n", + "[Nx, Ny, Nz] -> { [k_outer, k_inner, j_outer, j_inner, i_outer, i_inner, f_dim_0, f_dim_1, f_dim_2] : k_outer >= 0 and 0 <= k_inner <= 7 and k_inner < Nz - 8k_outer and j_outer >= 0 and 0 <= j_inner <= 7 and j_inner < Ny - 8j_outer and i_outer >= 0 and 0 <= i_inner <= 7 and i_inner < Nx - 8i_outer and 0 <= f_dim_0 <= 9 and f_dim_0 <= 1 + Nx - 8i_outer and 0 <= f_dim_1 <= 9 and f_dim_1 <= 1 + Ny - 8j_outer and 0 <= f_dim_2 <= 9 and f_dim_2 <= 1 + Nz - 8k_outer }\n", + "---------------------------------------------------------------------------\n", + "INAME IMPLEMENTATION TAGS:\n", + "f_dim_0: l.2\n", + "f_dim_1: l.1\n", + "f_dim_2: l.0\n", + "i_inner: l.2\n", + "i_outer: g.2\n", + "j_inner: l.1\n", + "j_outer: g.1\n", + "k_inner: l.0\n", + "k_outer: g.0\n", + "---------------------------------------------------------------------------\n", + "TEMPORARIES:\n", + "_f: type: np:dtype('float64'), shape: (f_dim_0:10, f_dim_1:10, f_dim_2:10), dim_tags: (N2:stride:100, N1:stride:10, N0:stride:1) scope:auto\n", + "---------------------------------------------------------------------------\n", + "INSTRUCTIONS:\n", + " for j_outer, k_outer, i_outer, f_dim_1, f_dim_0, f_dim_2\n", + "↱ \u001b[36m_f[f_dim_0, f_dim_1, f_dim_2]\u001b[0m = \u001b[35mf[f_dim_0 + 8*i_outer, f_dim_1 + 8*j_outer, f_dim_2 + 8*k_outer]\u001b[0m {id=\u001b[32mf_fetch_rule\u001b[0m}\n", + "│ end f_dim_1, f_dim_0, f_dim_2\n", + "│ for i_inner, k_inner, j_inner\n", + "├↱ \u001b[36mdfdx[i_inner + i_outer*8, j_inner + j_outer*8, k_inner + k_outer*8]\u001b[0m = \u001b[35m((_f[2 + i_inner, 1 + j_inner, 1 + k_inner] + (-1)*_f[i_inner, 1 + j_inner, 1 + k_inner]) / 2) / dx\u001b[0m {id=\u001b[32minsn\u001b[0m}\n", + "├└↱ \u001b[36mdfdy[i_inner + i_outer*8, j_inner + j_outer*8, k_inner + k_outer*8]\u001b[0m = \u001b[35m((_f[1 + i_inner, 2 + j_inner, 1 + k_inner] + (-1)*_f[1 + i_inner, j_inner, 1 + k_inner]) / 2) / dx\u001b[0m {id=\u001b[32minsn_0\u001b[0m}\n", + "└ └ \u001b[36mdfdz[i_inner + i_outer*8, j_inner + j_outer*8, k_inner + k_outer*8]\u001b[0m = \u001b[35m((_f[1 + i_inner, 1 + j_inner, 2 + k_inner] + (-1)*_f[1 + i_inner, 1 + j_inner, k_inner]) / 2) / dx\u001b[0m {id=\u001b[32minsn_1\u001b[0m}\n", + " end j_outer, k_outer, i_outer, i_inner, k_inner, j_inner\n", + "---------------------------------------------------------------------------\n" + ] + } + ], + "source": [ + "print(stencil.knl)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can also do something more complicated by inputting a `tmp_dict`, which computes temporary values (that don't get stored in global arrays) before executing the assignments specified by `map_dict`:" + ] + }, + { + "cell_type": "code", + "execution_count": 115, + "metadata": {}, + "outputs": [], + "source": [ + "f = ps.Field('f', offset=1)\n", + "g = ps.Field('g', offset=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 116, + "metadata": {}, + "outputs": [], + "source": [ + "tmp = p.Variable('tmp')" + ] + }, + { + "cell_type": "code", + "execution_count": 117, + "metadata": {}, + "outputs": [], + "source": [ + "tmp_dict = {}\n", + "for i in range(3):\n", + " shift = [0, 0, 0]\n", + " shift[i] = 1\n", + " expr = f.shift(tuple(shift))\n", + " shift[i] = - 1\n", + " expr += f.shift(tuple(shift))\n", + " tmp_dict[tmp[i]] = expr" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's check what we just did:" + ] + }, + { + "cell_type": "code", + "execution_count": 118, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "tmp[0] = f[i + 1 + 1, j + 1, k + 1] + f[i + 1 + -1, j + 1, k + 1]\n", + "tmp[1] = f[i + 1, j + 1 + 1, k + 1] + f[i + 1, j + 1 + -1, k + 1]\n", + "tmp[2] = f[i + 1, j + 1, k + 1 + 1] + f[i + 1, j + 1, k + 1 + -1]\n" + ] + } + ], + "source": [ + "for key, value in tmp_dict.items():\n", + " print(key, '=', ps.Indexer(value))" + ] + }, + { + "cell_type": "code", + "execution_count": 119, + "metadata": {}, + "outputs": [], + "source": [ + "map_dict = {\n", + " g: tmp[0] * tmp[1] * tmp[2]\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 120, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "---------------------------------------------------------------------------\n", + "KERNEL: loopy_kernel\n", + "---------------------------------------------------------------------------\n", + "ARGUMENTS:\n", + "Nx: ValueArg, type: np:dtype('int64')\n", + "Ny: ValueArg, type: np:dtype('int64')\n", + "Nz: ValueArg, type: np:dtype('int64')\n", + "f: type: np:dtype('float64'), shape: (2 + Nx, 2 + Ny, 2 + Nz), dim_tags: (N2:stride:(2 + Nz)*(2 + Ny), N1:stride:2 + Nz, N0:stride:1), offset: aspace: global\n", + "g: type: np:dtype('float64'), shape: (1 + Nx, 1 + Ny, 1 + Nz), dim_tags: (N2:stride:(1 + Nz)*(1 + Ny), N1:stride:1 + Nz, N0:stride:1), offset: aspace: global\n", + "---------------------------------------------------------------------------\n", + "DOMAINS:\n", + "[Nx, Ny, Nz] -> { [k_outer, k_inner, j_outer, j_inner, i_outer, i_inner, f_dim_0, f_dim_1, f_dim_2] : k_outer >= 0 and 0 <= k_inner <= 7 and k_inner < Nz - 8k_outer and j_outer >= 0 and 0 <= j_inner <= 7 and j_inner < Ny - 8j_outer and i_outer >= 0 and 0 <= i_inner <= 7 and i_inner < Nx - 8i_outer and 0 <= f_dim_0 <= 9 and f_dim_0 <= 1 + Nx - 8i_outer and 0 <= f_dim_1 <= 9 and f_dim_1 <= 1 + Ny - 8j_outer and 0 <= f_dim_2 <= 9 and f_dim_2 <= 1 + Nz - 8k_outer }\n", + "---------------------------------------------------------------------------\n", + "INAME IMPLEMENTATION TAGS:\n", + "f_dim_0: l.2\n", + "f_dim_1: l.1\n", + "f_dim_2: l.0\n", + "i_inner: l.2\n", + "i_outer: g.2\n", + "j_inner: l.1\n", + "j_outer: g.1\n", + "k_inner: l.0\n", + "k_outer: g.0\n", + "---------------------------------------------------------------------------\n", + "TEMPORARIES:\n", + "_f: type: np:dtype('float64'), shape: (f_dim_0:10, f_dim_1:10, f_dim_2:10), dim_tags: (N2:stride:100, N1:stride:10, N0:stride:1) scope:auto\n", + "tmp: type: , shape: (3), dim_tags: (N0:stride:1) scope:auto\n", + "---------------------------------------------------------------------------\n", + "INSTRUCTIONS:\n", + " for j_outer, k_outer, i_outer, f_dim_1, f_dim_0, f_dim_2\n", + "↱ \u001b[36m_f[f_dim_0, f_dim_1, f_dim_2]\u001b[0m = \u001b[35mf[f_dim_0 + 8*i_outer, f_dim_1 + 8*j_outer, f_dim_2 + 8*k_outer]\u001b[0m {id=\u001b[32mf_fetch_rule\u001b[0m}\n", + "│ end f_dim_1, f_dim_0, f_dim_2\n", + "│ for i_inner, k_inner, j_inner\n", + "├↱ \u001b[36mtmp[0]\u001b[0m = \u001b[35m_f[2 + i_inner, 1 + j_inner, 1 + k_inner] + _f[i_inner, 1 + j_inner, 1 + k_inner]\u001b[0m {id=\u001b[32minsn\u001b[0m}\n", + "├└↱ \u001b[36mtmp[1]\u001b[0m = \u001b[35m_f[1 + i_inner, 2 + j_inner, 1 + k_inner] + _f[1 + i_inner, j_inner, 1 + k_inner]\u001b[0m {id=\u001b[32minsn_0\u001b[0m}\n", + "└↱└ \u001b[36mtmp[2]\u001b[0m = \u001b[35m_f[1 + i_inner, 1 + j_inner, 2 + k_inner] + _f[1 + i_inner, 1 + j_inner, k_inner]\u001b[0m {id=\u001b[32minsn_1\u001b[0m}\n", + " └ \u001b[36mg[1 + i_inner + i_outer*8, 1 + j_inner + j_outer*8, 1 + k_inner + k_outer*8]\u001b[0m = \u001b[35mtmp[0]*tmp[1]*tmp[2]\u001b[0m {id=\u001b[32minsn_2\u001b[0m}\n", + " end j_outer, k_outer, i_outer, i_inner, k_inner, j_inner\n", + "---------------------------------------------------------------------------\n" + ] + } + ], + "source": [ + "stencil = ps.Stencil(map_dict, tmp_dict=tmp_dict, prefetch_args=['f'], h=1, dtype='float64')\n", + "\n", + "print(stencil.knl)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": false, + "sideBar": false, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": false, + "toc_window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/phi_chi.py b/examples/phi_chi.py new file mode 100644 index 0000000..1a69f8d --- /dev/null +++ b/examples/phi_chi.py @@ -0,0 +1,182 @@ +import numpy as np +import pyopencl as cl +import pyopencl.array as cla +import pystella as ps +# pylint: disable=no-member + +# set parameters +grid_shape = (128, 128, 128) +proc_shape = (1, 1, 1) +rank_shape = tuple(Ni // pi for Ni, pi in zip(grid_shape, proc_shape)) +grid_size = np.product(grid_shape) + +h = 2 +nscalars = 2 +pencil_shape = tuple(ni + 2 * h for ni in rank_shape) + +box_dim = (5, 5, 5) +volume = np.product(box_dim) +dx = tuple(Li / Ni for Li, Ni in zip(box_dim, grid_shape)) +dk = tuple(2 * np.pi / Li for Li in box_dim) +kappa = 1/10 +dt = kappa * min(dx) + +dtype = np.float64 +nscalars = 2 +mpl = 1 # change to np.sqrt(8 * np.pi) for reduced Planck mass units +mphi = 1.e-6 * mpl # units of mpl +mchi = [0, 0] # units of mpl +gsq = [2.5e-7 / mphi**2, 1.25e-7 / mphi**2] +f0 = [.193 * mpl, 0, 0] # units of mpl +df0 = [-.142231 * mpl, 0, 0] # units of mpl +end_time = 1 +end_scale_factor = 20 +Stepper = ps.RungeKutta4 + +ctx = ps.choose_device_and_make_context() +queue = cl.CommandQueue(ctx) + +decomp = ps.DomainDecomposition(proc_shape, h, rank_shape) +derivs = ps.GradientLaplacian(decomp, h, dx) + + +def potential(f): + phi, chi = f[0], [f[i] for i in range(1, nscalars)] + return 1/2 * phi**2 \ + + 1/2 * sum([mchi[i]**2 * chi[i]**2 for i, _ in enumerate(chi)]) \ + + 1/2 * phi**2 * sum([gsq[i] * chi[i]**2 for i, _ in enumerate(chi)]) + + +scalar_sector = ps.ScalarSector(nscalars, potential=potential) + +# create energy computation function +from pystella.sectors import get_rho_and_p +reduce_energy = ps.Reduction(decomp, scalar_sector, h=h, + rank_shape=rank_shape, grid_size=grid_size, + callback=get_rho_and_p) + + +def compute_energy(f_s, df_s, lap_f, a): + derivs(queue, fx=f_s, lap=lap_f) + + return reduce_energy(queue, f=f_s, dfdt=df_s, lap_f=lap_f, a=np.array(a)) + + +stepper = Stepper(scalar_sector, h=h, dt=dt) + +# create output function +if decomp.rank == 0: + from pystella.output import OutputFile + out = OutputFile(ctx=ctx, runfile=__file__) +else: + out = None +statistics = ps.FieldStatistics(decomp, h, rank_shape=rank_shape, + grid_size=grid_size) +fft = ps.DFT(decomp, ctx, queue, grid_shape, dtype) +spectra = ps.PowerSpectra(decomp, fft, dk, volume) +projector = ps.Projector(fft, h) + + +def output(step_count, t, f, dfdt, lap_f, energy, expand): + if step_count % 2 == 0: + f_stats = statistics(f[0]) + f_stats['mean'] /= mpl + f_stats['variance'] /= mpl**2 + + true_energy = {} + for key, val in energy.items(): + true_energy[key] = val / expand.a[0]**2 / mpl**2 + + if decomp.rank == 0: + out.output('energy', t=t, a=expand.a[0], + adot=expand.adot[0]/expand.a[0], + hubble=expand.hubble[0]/expand.a[0], + **true_energy, + eos=energy['pressure']/energy['total'], + constraint=expand.constraint(energy['total']) + ) + + out.output('statistics/f', t=t, a=expand.a[0], **f_stats) + + if expand.a[0] / output.a_last_spec >= 1.02: + output.a_last_spec = expand.a[0] + + scalar_spectra = spectra(f[0]) + + if decomp.rank == 0: + out.output('spectra', t=t, a=expand.a[0], + spectra=scalar_spectra) + + +output.a_last_spec = .1 + +# create cl arrays +f = cla.empty(queue, (3, nscalars,)+pencil_shape, dtype=dtype) +dfdt = cla.empty(queue, (3, nscalars,)+pencil_shape, dtype=dtype) +lap_f = cla.empty(queue, (nscalars,)+rank_shape, dtype=dtype) + +# set field means +for i in range(nscalars): + f[0, i] = f0[i] + dfdt[0, i] = df0[i] + +# compute energy of background fields and initialize expansion +energy = compute_energy(f[0], dfdt[0], lap_f, 1.) +expand = ps.Expansion(energy['total'], Stepper, mpl=mpl) + +# compute hubble correction to scalar field effective mass +addot = expand.addot_friedmann_2(expand.a[0], energy['total'], energy['pressure']) +hubbleCorrection = - addot / expand.a[0] + +# effective masses of scalar fields +from pymbolic import var +from pymbolic.mapper.evaluator import evaluate_kw +fields = [var('f0')[i] for i in range(nscalars)] +d2Vd2f = [ps.diff(potential(fields), field, field) for field in fields] +eff_mass = [evaluate_kw(x, f0=f0) + hubbleCorrection for x in d2Vd2f] + +modes = ps.RayleighGenerator(ctx, fft, dk, volume, seed=13298*(decomp.rank+1)) + +for fld in range(nscalars): + modes.init_WKB_fields(f[0, fld], dfdt[0, fld], norm=mphi**2, + omega_k=lambda k: np.sqrt(k**2 + eff_mass[fld]), + hubble=expand.hubble[0]) + +for i in range(nscalars): + f[0, i] += f0[i] + dfdt[0, i] += df0[i] + +# re-initialize energy and expansion +energy = compute_energy(f[0], dfdt[0], lap_f, expand.a[0]) +expand = ps.Expansion(energy['total'], Stepper, mpl=mpl) + +# output first slice +output(0, 0., f, dfdt, lap_f, energy, expand) + +# evolution +t = 0. +step_count = 0 + +if decomp.rank == 0: + print(energy) + +from time import time +start = time() +last_out = time() + +while t < end_time and expand.a[0] < end_scale_factor: + for s in range(stepper.num_stages): + stepper(s, queue=queue, a=expand.a, hubble=expand.hubble, + f=f, dfdt=dfdt, lap_f=lap_f) + expand.step(s, energy['total'], energy['pressure'], dt) + q = 0 if s == 3 else 1 + energy = compute_energy(f[q], dfdt[q], lap_f, expand.a[q]) + + t += dt + step_count += 1 + output(step_count, t, f, dfdt, lap_f, energy, expand) + if time() - last_out > 6: + last_out = time() + ms_per_step = (last_out - start) * 1e3 / step_count + if decomp.rank == 0: + print(t, expand.a[0], ms_per_step, 1e3/ms_per_step, sep='\t') diff --git a/examples/wave-equation.py b/examples/wave-equation.py new file mode 100644 index 0000000..2bf507a --- /dev/null +++ b/examples/wave-equation.py @@ -0,0 +1,45 @@ +import pyopencl as cl +import pyopencl.array as cla +import pyopencl.clrandom as clr +import pystella as ps + +# set parameters +grid_shape = (128, 128, 128) +proc_shape = (1, 1, 1) +rank_shape = tuple(Ni // pi for Ni, pi in zip(grid_shape, proc_shape)) +h = 1 +dtype = 'float64' +dx = tuple(10 / Ni for Ni in grid_shape) +dt = dx[0] / 10 + +# create pyopencl context, queue, and halo-sharer +ctx = ps.choose_device_and_make_context() +queue = cl.CommandQueue(ctx) +decomp = ps.DomainDecomposition(proc_shape, h, rank_shape) + +# initialize arrays with random data +f = clr.rand(queue, tuple(ni + 2 * h for ni in rank_shape), dtype) +dfdt = clr.rand(queue, tuple(ni + 2 * h for ni in rank_shape), dtype) +lap_f = cla.zeros(queue, rank_shape, dtype) +# temporary array for low-storage integrator +k_tmp = cla.zeros(queue, (2,) + rank_shape, dtype) + +# define system of equations +f_ = ps.DynamicField('f', offset='h') # don't overwrite f +rhs_dict = { + f_: f_.dot, # df/dt = \dot{f} + f_.dot: f_.lap # d\dot{f}/dt = \nabla^2 f +} +args = ps.get_field_args(rhs_dict) # infer argument information from rhs_dict + +# create time-stepping and derivative-computing kernels +stepper = ps.LowStorageRK54(rhs_dict, k_tmp, args=args, dt=dt, h=h) +derivs = ps.GradientLaplacian(decomp, h, dx) + +t = 0. +# loop over time +while t < 10.: + for s in range(stepper.num_stages): + derivs(queue, fx=f, lap=lap_f) + stepper(s, queue=queue, f=f, dfdt=dfdt, lap_f=lap_f) + t += dt diff --git a/pystella/__init__.py b/pystella/__init__.py new file mode 100644 index 0000000..c259bf7 --- /dev/null +++ b/pystella/__init__.py @@ -0,0 +1,136 @@ +__copyright__ = "Copyright (C) 2019 Zachary J Weiner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + + +from pystella.field import Field, DynamicField, Indexer, diff, get_field_args +from pystella.sectors import Sector, ScalarSector, TensorPerturbationSector +from pystella.elementwise import ElementWiseMap +from pystella.stencil import Stencil, StreamingStencil +from pystella.reduction import Reduction, FieldStatistics +from pystella.step import (RungeKutta4, RungeKutta3SSP, RungeKutta3Heun, + RungeKutta3Nystrom, RungeKutta3Ralston, + RungeKutta2Midpoint, RungeKutta2Ralston, LowStorageRK54, + LowStorageRK3Williamson, LowStorageRK3Inhomogeneous, + LowStorageRK3SSP) +from pystella.derivs import GradientLaplacian +from pystella.decomp import DomainDecomposition +from pystella.expansion import Expansion +from pystella.fourier import (DFT, RayleighGenerator, Projector, PowerSpectra, + SpectralGradientLaplacian) + +from loopy import set_caching_enabled +set_caching_enabled(True) + + +def choose_device_and_make_context(platform_choice=None, device_choice=None): + """ + A wrapper to choose a device and create a :class:`pyopencl.Context` on + a particular device. + + :arg platform_number: An integer specifying which element of the + :class:`list` returned by :func:`pyopencl.get_platforms` to choose. + Defaults to *None*, in which case a NVIDIA platform. + If one is not found, then the first platform is chosen. + + :arg device_number: An integer specifying which device to run on. + Defaults to *None*, in which case a device is chosen according to any + available environment variable defining the local MPI rank (defaulting to 0). + Currently only looks for OpenMPI and MVAPICH environment variables. + + :returns: A :class:`pyopencl.Context`. + """ + + import pyopencl as cl + + # look for NVIDIA platform + platform = None + platforms = cl.get_platforms() + if platform_choice is None: + for i, plt in enumerate(platforms): + if 'NVIDIA' in plt.name: + platform = plt + platform = platform or platforms[0] + else: + platform = platforms[platform_choice] + + devices = platform.get_devices() + try: + # sort devices based on their unique pci bus id + devices = sorted(devices, key=lambda dev: dev.pci_bus_id_nv) + except: # noqa + pass + num_devices = len(devices) + + import os + local_rank = int(os.getenv('OMPI_COMM_WORLD_LOCAL_RANK', + os.getenv('MV2_COMM_WORLD_LOCAL_RANK', 0))) + choice = device_choice or (local_rank % num_devices) + + return cl.Context([devices[choice]]) + + +class DisableLogging(): # silence logging warning + def __enter__(self): + import logging + self.original_level = logging.getLogger().getEffectiveLevel() + logging.disable(logging.CRITICAL) + + def __exit__(self, exception_type, exception_value, traceback): + import logging + logging.disable(self.original_level) + + +__all__ = [ + "Field", + "DynamicField", + "Indexer", + "diff", + "get_field_args", + "Sector", + "ScalarSector", + "TensorPerturbationSector", + "ElementWiseMap", + "RungeKutta4", + "RungeKutta3SSP", + "RungeKutta3Heun", + "RungeKutta3Nystrom", + "RungeKutta3Ralston", + "RungeKutta2Midpoint", + "RungeKutta2Ralston", + "LowStorageRK54", + "LowStorageRK3Williamson", + "LowStorageRK3Inhomogeneous", + "LowStorageRK3SSP", + "Stencil", + "StreamingStencil", + "GradientLaplacian", + "Reduction", + "FieldStatistics", + "DomainDecomposition", + "Expansion", + "DFT", + "RayleighGenerator", + "Projector", + "PowerSpectra", + "SpectralGradientLaplacian", + "choose_device_and_make_context", +] diff --git a/pystella/decomp.py b/pystella/decomp.py new file mode 100644 index 0000000..9f03fcb --- /dev/null +++ b/pystella/decomp.py @@ -0,0 +1,585 @@ +__copyright__ = "Copyright (C) 2019 Zachary J Weiner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + + +import numpy as np +import pyopencl.array as cla +import loopy as lp + + +class DomainDecomposition: + """ + Implements functions needed for the MPI domain decomposition of a 3D grid. + + If :mod:`mpi4py` is not installed, then only single-rank operation is supported. + + .. automethod:: __init__ + .. automethod:: share_halos + .. automethod:: remove_halos + .. automethod:: gather_array + .. automethod:: restore_halos + .. automethod:: scatter_array + .. automethod:: rankID + .. autoattribute:: rank_tuple + .. automethod:: bcast + .. automethod:: allreduce + + .. attribute:: comm + + An :class:`mpi4py.MPI.COMM_WORLD` if :mod:`mpi4py` is installed, else *None*. + + .. attribute:: rank + + The integral rank of the calling process, i.e., that returned by + :meth:`mpi4py.MPI.COMM_WORLD.Get_rank`. + + .. attribute:: nranks + + The total number of ranks, i.e., that returned by + :meth:`mpi4py.MPI.COMM_WORLD.Get_size`. + + .. attribute:: proc_shape + + .. attribute:: rank_shape + """ + + def __init__(self, proc_shape, h, rank_shape=None): + """ + :arg queue: The :class:`pyopencl.CommandQueue` to enqueue kernels and copies. + + :arg proc_shape: A 3-:class:`tuple` specifying the shape of the MPI + processor grid. + + .. note:: + + Currently, ``proc_shape[2]`` must be ``1``, i.e., only + two-dimensional domain decompositions are supported. + + :arg h: The number of halo padding layers on each face of the numerical grid. + + The following keyword arguments are recognized: + + :arg rank_shape: A 3-:class:`tuple` specifying the shape of the computational + sub-grid on the calling process. + Defaults to *None*, in which case the global size is not fixed (and + will be inferred when, e.g., :meth:`share_halos` is called, at a slight + performance penalty). + + :raises NotImplementedError: if ``proc_shape[2] != 1``. + + :raises ValueError: if the size of the processor grid + ``proc_shape[0] * proc_shape[1] * proc_shape[2]`` is not equal to the + total number of ranks the application was launched with + (i.e., that returned by :func:`mpi4py.MPI.COMM_WORLD.Get_size()`). + """ + + self.proc_shape = proc_shape + self.h = h + self.buffer_arrays = {} + self.rank_shape = rank_shape + + try: + from mpi4py import MPI + self.comm = MPI.COMM_WORLD + self.rank = self.comm.Get_rank() + self.nranks = self.comm.Get_size() + except ModuleNotFoundError: + self.comm = None + self.rank = 0 + self.nranks = 1 + + if proc_shape[2] != 1: + raise NotImplementedError("decomposition in z not yet supported") + + if proc_shape[0] * proc_shape[1] * proc_shape[2] != self.nranks: + raise ValueError( + "%s is an invalid decomposition for %d ranks" + % (str(proc_shape), self.nranks)) + + self.rz = self.rank % proc_shape[2] + self.ry = (self.rank - self.rz) // proc_shape[2] % proc_shape[1] + self.rx = (self.rank - self.rz - proc_shape[2] * self.ry) // proc_shape[1] + + params_to_fix = {'h': self.h} + if self.rank_shape is not None: + for k, v in zip(('Nx', 'Ny', 'Nz'), self.rank_shape): + params_to_fix[k] = v + + pencil_shape_str = "(Nx+2*h, Ny+2*h, Nz+2*h)" + + def x_comm_knl(instructions): + knl = lp.make_kernel( + "[Ny, Nz, h] \ + -> { [i,j,k]: 0<=i { [i,j,k]: 0<=i { [i,j,k]: 0<=i { [i,j,k]: 0<=i>> f = Field('f', offset='h') + >>> coefs = {(1, 0, 0): 1, (-1, 0, 0): -1} + >>> stencil = expand_stencil(f, coefs) + >>> print(Indexer(stencil)) + f[i + h + 1, j + h, k + h] + (-1)*f[i + h + -1, j + h, k + h] + """ + + return sum([c * f.shift(offset) for offset, c in coefs.items()]) + + +def centered_diff(f, coefs, direction, order): + """ + A convenience wrapper to :func:`expand_stencil` for computing centered + differences. By assuming the symmetry of the stencil (which has parity given + by the parity of ``order``), no redundant coefficients need to be supplied. + Further, by supplying the ``direction`` parameter, the input offset (the keys + of ``coefs``) need only be integers. + + :arg f: A :class:`~pystella.Field`. + + :arg coefs: A :class:`dict` whose values are the coefficients of the stencil + at an offset given by the key. The keys must be integers, and the + values may be :mod:`pymbolic` expressions or constants. Only + non-redundant ``(offset, coefficient)`` pairs are needed. + + :arg direction: An integer in ``(0, 1, 2)`` denoting the direction over which + to expand the stencil (i.e., to apply the offset). + + :arg order: The order of the derivative being computed, which determines + whether coefficients at the opposite offset have the same or opposite + sign. + + Example:: + + >>> f = Field('f', offset='h') + >>> coefs = {1: 1} + >>> stencil = centered_diff(f, coefs, 0, 1) + >>> print(Indexer(stencil)) + f[i + h + 1, j + h, k + h] + (-1)*f[i + h + -1, j + h, k + h] + """ + + all_coefs = {} + for s, c in coefs.items(): + offset = [0, 0, 0] + + # skip central point (s == 0) for odd order + if s != 0 or order % 2 == 0: + offset[direction-1] = s + all_coefs[tuple(offset)] = c + + # add the opposite point + if s != 0: + offset[direction-1] = - s + all_coefs[tuple(offset)] = (-1)**order * c + + return expand_stencil(f, all_coefs) + + +class FiniteDifferenceStencil: + coefs = NotImplemented + truncation_order = NotImplemented + order = NotImplemented + is_centered = NotImplemented + + def __call__(self, f, direction): + if self.is_centered: + return centered_diff(f, self.coefs, direction, self.order) + else: + return expand_stencil(f, self.coefs) + + def get_eigenvalues(self, k, dx): + raise NotImplementedError + + +_grad_coefs = {} +_grad_coefs[1] = {1: 1/2} +_grad_coefs[2] = {1: 8/12, 2: -1/12} +_grad_coefs[3] = {1: 45/60, 2: -9/60, 3: 1/60} +_grad_coefs[4] = {1: 672/840, 2: -168/840, 3: 32/840, 4: -3/840} + + +class FirstCenteredDifference(FiniteDifferenceStencil): + def __init__(self, h): + self.coefs = _grad_coefs[h] + self.truncation_order = 2 * h + self.order = 1 + self.is_centered = True + + def get_eigenvalues(self, k, dx): + import numpy as np + th = k * dx + if self.truncation_order == 2: + return np.sin(th) / dx + if self.truncation_order == 4: + return (8 * np.sin(th) - np.sin(2 * th)) / (6 * dx) + if self.truncation_order == 6: + return (45 * np.sin(th) - 9 * np.sin(2 * th) + + np.sin(3 * th) + ) / (30 * dx) + if self.truncation_order == 8: + return (672 * np.sin(th) - 168 * np.sin(2 * th) + + 32 * np.sin(3 * th) - 3 * np.sin(4 * th) + ) / (420 * dx) + else: + return k + + +_lap_coefs = {} +_lap_coefs[1] = {0: -2, 1: 1} +_lap_coefs[2] = {0: -30/12, 1: 16/12, 2: -1/12} +_lap_coefs[3] = {0: -490/180, 1: 270/180, 2: -27/180, 3: 2/180} +_lap_coefs[4] = {0: -14350/5040, 1: 8064/5040, 2: -1008/5040, + 3: 128/5040, 4: -9/5040} + + +class SecondCenteredDifference(FiniteDifferenceStencil): + def __init__(self, h): + self.coefs = _lap_coefs[h] + self.truncation_order = 2 * h + self.order = 2 + self.is_centered = True + + def get_eigenvalues(self, k, dx): + import numpy as np + th = k * dx + if self.truncation_order == 2: + return (2 * np.cos(th) - 2) / dx**2 + elif self.truncation_order == 4: + return (32 * np.cos(th) - 2 * np.cos(2 * th) - 30) / (12 * dx**2) + elif self.truncation_order == 6: + return (90 * np.cos(th) - 9 * np.cos(2 * th) + + 2/3 * np.cos(3 * th) - 245/3 + ) / (30 * dx**2) + elif self.truncation_order == 8: + return (1344 * np.cos(th) - 168 * np.cos(2 * th) + + 64/3 * np.cos(3 * th) - 3/2 * np.cos(4 * th) - 7175/6 + ) / (420 * dx**2) + else: + return - k**2 + + +class GradientLaplacian: + """ + A convenience class for generating kernels which compute spatial gradients, + Laplacians, and combinations thereof. + + See :class:`SpectralGradientLaplacian` for a version of this + class implementing spectral collocation. + + .. automethod:: __init__ + .. automethod:: __call__ + """ + + def __init__(self, decomp, h, dx, **kwargs): + """ + The following arguments are required: + + :arg decomp: An instance of :class:`DomainDecomposition`. + + :arg h: The number of halo padding layers on each face of the numerical grid. + + :arg dx: A 3-:class:`tuple` specifying the grid spacing of each axis. + + The following keyword-only arguments are recognized: + + :arg first_stencil: A :class:`callable` with signature + ``(f, direction)`` where f is a :class:`Field` and ``direction`` + indicates the spatial axis (1, 2, or 3) along which the stencil is taken, + returning the (symbolic) first-order stencil. + Defaults to the centered-difference of the highest order allowed + by the amount of array padding (set by :attr:`h`). + See :func:`~pystella.derivs.expand_stencil`. + + :arg second_stencil: Like ``first_stencil``, but for the second-order + differences. + + :arg rank_shape: A 3-:class:`tuple` specifying the global size of every + kernel call. + Defaults to *None*, in which case the global size is not fixed (and + will be inferred when the kernel is called, at a slight performance + penalty). + + .. ifconfig:: not on_rtd + + :arg stream: Whether to use :class:`StreamingStencil`. + Defaults to *False*. + """ + + self.decomp = decomp + stream = kwargs.pop('stream', False) + first_stencil = kwargs.pop('first_stencil', FirstCenteredDifference(h)) + second_stencil = kwargs.pop('second_stencil', SecondCenteredDifference(h)) + rank_shape = kwargs.pop('rank_shape', None) + + args = [lp.GlobalArg('fx', shape="(Nx+2*h, Ny+2*h, Nz+2*h)", offset=lp.auto)] + + fx = Field('fx', offset='h') + pd = tuple(Field(pdi) for pdi in ('pdx', 'pdy', 'pdz')) + pdx, pdy, pdz = ({pdi: first_stencil(fx, i+1) * (1/dxi)} + for i, (pdi, dxi) in enumerate(zip(pd, dx))) + lap = {Field('lap'): sum(second_stencil(fx, i+1) * dxi**-2 + for i, dxi in enumerate(dx))} + + self.pdx_knl = Stencil(pdx, args=args, prefetch_args=['fx'], + lsize=(16, 2, 16), h=h, rank_shape=rank_shape) + self.pdy_knl = Stencil(pdy, args=args, prefetch_args=['fx'], + lsize=(16, 16, 2), h=h, rank_shape=rank_shape) + self.pdz_knl = Stencil(pdz, args=args, prefetch_args=['fx'], + lsize=(64, 2, 2), h=h, rank_shape=rank_shape) + + if stream: + lsize = {h_: (16, 4, 8) for h_ in range(1, 5)} + else: + lsize = {1: (8, 8, 8), 2: (8, 4, 4), 3: (4, 4, 4), 4: (2, 2, 2)} + + SS = StreamingStencil if stream else Stencil + self.lap_knl = SS(lap, args=args, prefetch_args=['fx'], + lsize=lsize[h], h=h, rank_shape=rank_shape) + + self.grad_knl = SS({**pdx, **pdy, **pdz}, args=args, prefetch_args=['fx'], + lsize=lsize[h], h=h, rank_shape=rank_shape) + + self.grad_lap_knl = SS({**pdx, **pdy, **pdz, **lap}, + args=args, prefetch_args=['fx'], + lsize=lsize[h], h=h, rank_shape=rank_shape) + + def __call__(self, queue, fx, *, + lap=None, pdx=None, pdy=None, pdz=None, grd=None): + """ + Computes requested derivatives of the input ``fx``. + + :arg fx: The array to compute derivatives of. Halos are shared using + :meth:`DomainDecomposition.share_halos`, and a kernel is called + based on what combination of the remainin input arguments are not *None*. + + Valid combinations are + + * all of ``lap``, ``pdx``, ``pdy``, and ``pdz`` + (or equivalently ``lap`` and ``grd``) + + * any single one of ``lap``, ``pdx``, ``pdy``, or ``pdz`` + + * only ``pdx``, ``pdy``, and ``pdz`` + (or equivalently only ``grd``) + + If ``fx`` has shape ``(...,) + (rank_shape+2*h)``, all the + outermost indices (i.e., in place of ``...``) are looped over. + As an example, with ``h=1``:: + + >>> fx.shape, lap.shape + ((2, 3, 130, 130, 130), (2, 3, 128, 128, 128)) + >>> derivs(queue, fx=fx, lap=lap) + + would loop over the outermost two axes with shape ``(2, 3)``. + Note that the shapes of ``fx`` and ``lap`` (or in general all input + arrays) must match on these outer axes. + + :arg lap: The array which will store the Laplacian of ``fx``. + Defaults to *None*. + + :arg pdx: The array which will store the :math:`x`-derivative of ``fx``. + Defaults to *None*. + + :arg pdy: The array which will store the :math:`y`-derivative of ``fx``. + Defaults to *None*. + + :arg pdz: The array which will store the :math:`z`-derivative of ``fx``. + Defaults to *None*. + + :arg grd: The array containing the gradient of ``fx``, i.e., all three of + ``pdx``, ``pdy``, and ``pdz``. + If supplied, any input values to ``pdx``, ``pdy``, or ``pdz`` are + ignored and replaced via :: + + pdx = grd[..., 0, :, :, :] + pdy = grd[..., 1, :, :, :] + pdz = grd[..., 2, :, :, :] + + Defaults to *None*. + + :returns: The :class:`pyopencl.Event` associated with the kernel + invocation (i.e., of the last called kernel if multiple axes are + being looped over). + """ + + from itertools import product + slices = list(product(*[range(n) for n in fx.shape[:-3]])) + + if grd is not None: + pdx = grd[..., 0, :, :, :] + pdy = grd[..., 1, :, :, :] + pdz = grd[..., 2, :, :, :] + + for s in slices: + self.decomp.share_halos(queue, fx[s]) + if (lap is not None and pdx is not None + and pdy is not None and pdz is not None): + evt, _ = self.grad_lap_knl(queue, fx=fx[s], lap=lap[s], + pdx=pdx[s], pdy=pdy[s], pdz=pdz[s]) + elif pdx is not None and pdy is not None and pdz is not None: + evt, _ = self.grad_knl(queue, fx=fx[s], + pdx=pdx[s], pdy=pdy[s], pdz=pdz[s]) + elif lap is not None: + evt, _ = self.lap_knl(queue, fx=fx[s], lap=lap[s]) + elif pdx is not None: + evt, _ = self.pdx_knl(queue, fx=fx[s], pdx=pdx[s]) + elif pdy is not None: + evt, _ = self.pdy_knl(queue, fx=fx[s], pdy=pdy[s]) + elif pdz is not None: + evt, _ = self.pdz_knl(queue, fx=fx[s], pdz=pdz[s]) + + return evt diff --git a/pystella/elementwise.py b/pystella/elementwise.py new file mode 100644 index 0000000..7f17b78 --- /dev/null +++ b/pystella/elementwise.py @@ -0,0 +1,220 @@ +__copyright__ = "Copyright (C) 2019 Zachary J Weiner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + + +import loopy as lp +from pystella.field import Indexer +import pymbolic.primitives as pp + +__doc__ = """ +.. currentmodule:: pystella +.. autoclass:: ElementWiseMap +""" + + +class ElementWiseMap: + """ + An interface to :func:`loopy.make_kernel`, which creates a kernel + with parallelization suitable for operations which are "local"--namely, + element-wise maps where each workitem (thread) only accesses one element + of global arrays. + + .. automethod:: __init__ + .. automethod:: __call__ + """ + + def parallelize(self, knl, lsize): + knl = lp.split_iname(knl, "k", lsize[0], outer_tag="g.0", inner_tag="l.0") + knl = lp.split_iname(knl, "j", lsize[1], outer_tag="g.1", inner_tag="l.1") + knl = lp.split_iname(knl, "i", lsize[2], outer_tag="g.2", inner_tag="unr") + return knl + + def _assignment(self, assignee, expression, **kwargs): + no_sync_with = kwargs.pop('no_sync_with', [('*', 'any')]) + return lp.Assignment(assignee, expression, + no_sync_with=no_sync_with, + **kwargs) + + def make_kernel(self, map_dict, tmp_dict, args, **kwargs): + temp_statements = [] + temp_vars = [] + for assignee, expression in tmp_dict.items(): + # only declare temporary variables once + if isinstance(assignee, pp.Variable): + current_tmp = assignee + elif isinstance(assignee, pp.Subscript): + current_tmp = assignee.aggregate + else: + current_tmp = None + if current_tmp is not None and current_tmp not in temp_vars: + temp_vars += [current_tmp] + temp_var_type = lp.Optional(None) + else: + temp_var_type = lp.Optional() + + stmnt = self._assignment(Indexer(assignee), Indexer(expression), + temp_var_type=temp_var_type) + temp_statements += [stmnt] + + output_statements = [] + for assignee, expression in map_dict.items(): + stmnt = self._assignment(Indexer(assignee), Indexer(expression)) + output_statements += [stmnt] + + options = kwargs.pop('options', lp.Options()) + # ignore lack of supposed dependency for single-instruction kernels + if len(map_dict) + len(tmp_dict) == 1: + setattr(options, 'check_dep_resolution', False) + + knl = lp.make_kernel( + "[Nx, Ny, Nz] -> {[i,j,k]: 0<=i>> f = Field('f', offset='h') + >>> print(Indexer(f)) + f[i + h, j + h, k + h] + >>> print(Indexer(f[0])) + f[0, i + h, j + h, k + h] + + See `test_field.py + `_ + for more examples of + the intended functionality. + + .. attribute:: child + + The child expression representing the un-subscripted field. Can be input + as a string or a :class:`pymbolic.primitives.Expression`. + + .. attribute:: name + + The name of the :class:`Field` instance, i.e., as would appear in + a generated kernel. Defaults to ``str(child)``. + + .. attribute:: indices + + A tuple of (symbolic) array indices that will subscript the array. Each + entry may be a :class:`pymbolic.primitives.Variable` or a string which + parses to one. Defaults to ``('i', 'j', 'k')`` + + .. attribute:: offset + + The amount of padding by which to offset the array axes corresponding to + the elements of :attr:`indices`. May be a tuple with the same length as + :attr:`indices` or a single value. In the latter case, the input is + transformed into a tuple with the same length as :attr:`indices`, each with + the same value. Defaults to ``0``. + + .. attribute:: ignore_prepends + + Whether to ignore array subscripts prepended when processed with + :func:`Indexer`. Useful for timestepping kernels which prepend array indices + corresponding to extra storage axes (to specify that an array does not have + this axis). Defaults to *False*. + """ + + def __init__(self, child, name=None, offset=0, indices=('i', 'j', 'k'), + ignore_prepends=False): + self.child = parse_if_str(child) + self.name = name if isinstance(name, str) else str(child) + + if not isinstance(offset, (list, tuple)): + offset = (offset,)*len(indices) + if len(offset) != len(indices): + raise ValueError('offset and indices must have same length') + + self.offset = tuple(parse_if_str(o) for o in offset) + self.indices = tuple(parse_if_str(i) + off + for i, off in zip(indices, self.offset)) + + self.ignore_prepends = ignore_prepends + + def __getinitargs__(self): + return (self.child, self.indices, self.name, self.ignore_prepends) + + mapper_method = "map_field" + + def make_stringifier(self, originating_stringifier=None): + # FIXME: do something with originating_stringifier? + return FieldStringifyMapper() + + def shift(self, vec): + return Field(self.child, self.name, offset=vec, indices=self.indices, + ignore_prepends=self.ignore_prepends) + + +class FieldStringifyMapper(StringifyMapper): + def map_field(self, expr, enclosing_prec, *args, **kwargs): + if expr.name is not None: + return self.rec(parse(expr.name), enclosing_prec, *args, **kwargs) + else: + return self.rec(expr.child, enclosing_prec, *args, **kwargs) + + map_dynamic_field = map_field + + +class DynamicField(Field): + """ + A subclass of :class:`Field` which also contains associated :class:`Field` + instances representing various derivatives of the base :class:`Field`. + + .. attribute:: dot + + A :class:`Field` representing the time derivative of the base + :class:`Field`. It shares the same :attr:`indices` and :attr:`offset` + as the base :class:`Field`. Its name defaults to ``d{self.child}dt``, + but may be specified via the argument ``dot_child``. + + .. attribute:: lap + + A :class:`Field` representing the Laplacian of the base + :class:`Field`. It shares the same :attr:`indices` as the base + :class:`Field` but with ``offset = 0``. Its name defaults to + ``lap_{self.child}``, but may be specified via the argument + ``lap_child``. + + .. attribute:: pd + + A :class:`Field` representing the spatial derivative(s) of the base + :class:`Field`. It shares the same :attr:`indices` as the base + :class:`Field` but with ``offset = 0``. Its name defaults to + ``d{self.child}dx``, but may be specified via the argument + ``pd_child``. + + .. automethod:: d + + """ + + def __init__(self, child, name=None, offset='0', indices=('i', 'j', 'k'), + dot_child=None, lap_child=None, pd_child=None): + super().__init__(child, name, offset, indices) + + self.dot = Field(dot_child if dot_child is not None else 'd' + child + 'dt', + 'd' + self.name + 'dt', + offset, indices=indices) + + self.lap = Field(lap_child if lap_child is not None else 'lap_' + child, + 'lap_' + self.name, + offset='0', indices=indices, ignore_prepends=True) + + self.pd = Field(pd_child if pd_child is not None else 'd' + child + 'dx', + 'd' + self.name + 'dx', + offset='0', indices=indices, ignore_prepends=True) + + def d(self, *args): + """ + Returns the (subscripted) derivative of the base :class:`Field`, i.e., + either :attr:`dot` or :attr:`pd` with the appropriate index. + + For example, the "time" derivative of a field would be + + >>> f = DynamicField('f') + >>> print(f.d(0)) # x^0 = "time" + dfdt + + Additional arguments are interpreted as subscripts to the resulting array; + the final argument corresponds to the coordinate being differentiated with + respect to. + + >>> print(f.d(1, 2, 0)) + dfdt[1, 2] + + Spatial indices ``1`` through ``3`` denote spatial derivatives (whose + array subscripts are ``0`` through ``2``). + + >>> print(f.d(2)) # x^2 = y + dfdx[1] + >>> print(f.d(0, 1, 3)) # x^3 = z + dfdx[0, 1, 2] + + """ + mu = args[-1] + indices = args[:-1]+(mu-1,) + return self.dot[args[:-1]] if mu == 0 else self.pd[indices] + + def __getinitargs__(self): + return (self.child, self.indices, self.name, self.dot, self.lap, self.pd) + + mapper_method = "map_dynamic_field" + + +class IndexMapper(IdentityMapper): + def parse_prepend(self, pre_index): + if isinstance(pre_index, str): + pre_index = (parse(pre_index),) + if isinstance(pre_index, pp.Variable): + pre_index = (pre_index,) + return pre_index + + def map_field(self, expr, *args, **kwargs): + if expr.ignore_prepends: + pre_index = () + else: + pre_index = self.parse_prepend(kwargs.pop('prepend_with', ())) + + if isinstance(expr.child, pp.Subscript): + x = pp.Subscript(expr.child.aggregate, + pre_index + expr.child.index_tuple + expr.indices) + elif isinstance(expr.child, pp.Variable): + full_index = pre_index + expr.indices + if full_index == (): + x = expr.child + else: + x = pp.Subscript(expr.child, pre_index + expr.indices) + else: + x = expr + return self.rec(x) + + map_dynamic_field = map_field + + def map_subscript(self, expr, *args, **kwargs): + if isinstance(expr.aggregate, Field): + pre_index = () if expr.aggregate.ignore_prepends \ + else self.parse_prepend(kwargs.pop('prepend_with', ())) + + a = self.rec(expr.aggregate) + if isinstance(a, pp.Subscript): + agg = a.aggregate + full_index = pre_index + expr.index_tuple + a.index_tuple + else: + agg = a + full_index = pre_index + expr.index_tuple + + if full_index == (): + x = agg + else: + x = pp.Subscript(agg, full_index) + return self.rec(x) + else: + return IdentityMapper.map_subscript(self, expr, *args, **kwargs) + + def map_lookup(self, expr, *args, **kwargs): + return self.rec(pp.Variable(expr.name)) + + +#: An instance of :class:`IndexMapper` which appends indices to :class:`Field` +#: instances in an expression, turning them into ordinary +#: :class:`pymbolic.primitives.Subscript`'s. +#: See the documentation of :class:`Field` for examples. +#: +#: :arg expr: The :mod:`pymbolic` expression to be mapped. +#: +#: :arg prepend_with: A :class:`tuple` of indices to prepend to the subscript +#: of any :class:`Field`'s in ``expr`` (unless a given :class:`Field` has +#: :attr:ignore_prepends` set to *False*. Defaults to an empty :class:`tuple`. +Indexer = IndexMapper() + +from pymbolic.mapper import Collector + + +class FieldCollector(Collector): + def map_field(self, expr): + return set([expr]) + + map_dynamic_field = map_field + + +def get_field_args(expressions, unpadded_shape=None): + """ + A :class:`pymbolic.Collector` which collects all :class:`~pystella.Field`'s + from ``expressions`` and returns a corresponding list of + :class:`loopy.ArrayArg`'s, using information about array indexing offsets + to produce + + .. warning:: + + This method currently does not correctly process + :class:`~pystella.Field`'s which are subscripted (i.e., nested + inside a :class:`pymbolic.primitives.Subscript`). + That is, it disregards any information about outer axes as represented + by subscripting. + + :arg expressions: The expressions from which to collect + :class:`~pystella.Field`'s. + May be one of the following: + + * A :class:`dict`, in which case all keys and values are iterated over. + + * A :class:`list`, in which case all elements are iterated over. + + * A :class:`pymbolic.primitives.Expression`. + + The following keyword arguments are recognized: + + :arg unpadded_shape: The shape of :class:`~pystella.Field`'s in ``expressions`` + (sans padding). + Defaults to ``(Nx, Ny, Nz)``. + + :returns: A :class:`list` of :class:`loopy.ArrayArg`'s. + + Example:: + + >>> f = Field('f', offset='h) + >>> get_field_args(f) + [, shape: (Nx + 2*h, Ny + 2*h, Nz + 2*h) + aspace: global>] + """ + + all_exprs = [] + if isinstance(expressions, dict): + for k, v in expressions.items(): + all_exprs.append(k) + all_exprs.append(v) + elif isinstance(expressions, list): + all_exprs = expressions + else: + all_exprs = [expressions] + + if unpadded_shape is None: + unpadded_shape = parse('Nx, Ny, Nz') + + from loopy import GlobalArg + + fields = FieldCollector()(all_exprs) + args = [] + for f in fields: + shape = tuple(N + 2 * h for N, h in zip(unpadded_shape, f.offset)) + args.append(GlobalArg(f.child.name, shape=shape)) + + return sorted(args, key=lambda f: f.name) + + +__all__ = [ + "Field", + "DynamicField", + "Indexer", + "diff", + "get_field_args", + # "pymbolic_to_sympy", + # "sympy_to_pymbolic", + # "simplify", +] diff --git a/pystella/field/diff.py b/pystella/field/diff.py new file mode 100644 index 0000000..a03ce47 --- /dev/null +++ b/pystella/field/diff.py @@ -0,0 +1,89 @@ +__copyright__ = "Copyright (C) 2019 Zachary J Weiner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + + +import pymbolic.primitives as pp +from pymbolic.mapper.differentiator import DifferentiationMapper +from pymbolic import var + + +class FieldDifferentiationMapper(DifferentiationMapper): + def __init__(self, variable, xmu=None): + if xmu is not None: + self.xmu = xmu + else: + self.xmu = {var('t'): 0, var('x'): 1, var('y'): 2, var('z'): 3} + super().__init__(variable) + + map_field = DifferentiationMapper.map_variable + + def map_dynamic_field(self, expr, *args): + if self.variable in self.xmu: + return expr.d(*args, self.xmu[self.variable]) + else: + return self.map_field(expr, *args) + + def map_subscript(self, expr, *args): + from pystella.field import DynamicField + if isinstance(expr.aggregate, DynamicField) and self.variable in self.xmu: + return self.rec(expr.aggregate, *expr.index_tuple) + else: + return super().map_subscript(expr, *args) + + def map_if(self, expr, *args): + from pymbolic.primitives import If + return If(expr.condition, self.rec(expr.then), self.rec(expr.else_)) + + +def diff(f, *x): + """ + A differentiator which computes ``\\partial f / \\partial x`` and understands + :class:`Field`'s. If ``x`` is one of ``t``, ``x``, ``y``, or ``z`` and ``f`` + is a :class:`DynamicField`, the corresponding derivative :class:`Field` is + returned. + + Examples:: + + >>> f = DynamicField('f') + >>> print(diff(f**3, f)) + 3*f**2 + >>> print(diff(f**3, f, f)) + 3*2*f + >>> print(diff(f**3, 't')) + 3*f**2*dfdt + >>> print(diff(f**3, f, 't')) + 3*2*f*dfdt + >>> print(diff(f + 2, 'x')) + dfdx[0] + + :arg f: A :mod:`pymbolic` expression to be differentiated. + + :arg x: A :class:`pymbolic.primitives.Expression` or a string to be parsed + (or multiple thereof). If multiple positional arguments are provided, + derivatives are taken with respect to each in order. + (See the examples above.) + """ + + if len(x) > 1: + return diff(diff(f, x[0]), *x[1:]) + else: + return FieldDifferentiationMapper(pp.make_variable(x[0]))(f) diff --git a/pystella/field/sympy.py b/pystella/field/sympy.py new file mode 100644 index 0000000..dd1b523 --- /dev/null +++ b/pystella/field/sympy.py @@ -0,0 +1,143 @@ +__copyright__ = "Copyright (C) 2019 Zachary J Weiner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + + +import sympy as sym +import pymbolic.primitives as pp +from pymbolic.interop.sympy import PymbolicToSympyMapper, SympyToPymbolicMapper + +__doc__ = """ +.. currentmodule:: pystella.field.sympy + +Sympy interoperability +^^^^^^^^^^^^^^^^^^^^^^ + +.. autofunction:: pymbolic_to_sympy +.. autofunction:: sympy_to_pymbolic +.. autofunction:: simplify +""" + + +class SympyField(sym.Symbol): + def __new__(cls, field, **assumptions): + symb = super().__new__(cls, field.child.name, **assumptions) + symb.field = field + return symb + + +class PymbolicToSympyMapperWithField(PymbolicToSympyMapper): + def map_lookup(self, expr, *args, **kwargs): + return pp.Variable(expr.name) + + def map_call(self, expr): + function = self.rec(expr.function) + if isinstance(function, pp.Variable): + func_name = function.name + try: + func = getattr(self.sym.functions, func_name) + except AttributeError: + func = self.sym.Function(func_name) + return func(*[self.rec(par) for par in expr.parameters]) + else: + self.raise_conversion_error(expr) + + def map_field(self, expr): + return SympyField(expr) + + map_dynamic_field = map_field + + +class SympyToPymbolicMapperMathLookup(SympyToPymbolicMapper): + functions = {'exp', 'expm1', 'log', + 'sin', 'cos', 'tan', + 'sinh', 'cosh', 'tanh', + 'fabs', 'Abs', 'sign'} + + def map_Function(self, expr): + name = self.function_name(expr) + if name in self.functions: + args = tuple(self.rec(arg) for arg in expr.args) + + from pymbolic.primitives import Variable, Lookup + if name == 'Abs': + call = Lookup(Variable('math'), 'fabs') + elif name == 'sign': + call = Lookup(Variable('math'), 'copysign') + args = (1,)+args + else: + call = Lookup(Variable('math'), name) + return call(*args) + else: + return self.not_supported(expr) + + +class SympyToPymbolicMapperWithField(SympyToPymbolicMapperMathLookup): + def map_SympyField(self, expr): + return expr.field + + +#: A mapper which converts :class:`pymbolic.primitives.Expression`'s into +#: :mod:`sympy` expressions and understands :class:`~pystella.Field`'s. +#: The result can be converted back to a :class:`pymbolic.primitives.Expression` +#: with all :class:`~pystella.Field`'s in place, accomplished via a subclass +#: of :class:`sympy.Symbol` which retains a copy of the :class:`~pystella.Field`. +#: +#: :arg expr: The :mod:`pymbolic` expression to be mapped. +#: +pymbolic_to_sympy = PymbolicToSympyMapperWithField() + +#: A mapper which converts :mod:`sympy` expressions into +#: :class:`pymbolic.primitives.Expression`'s and understands the custom :mod:`sympy` +#: type used to represent :class:`~pystella.Field`'s by :func:`pymbolic_to_sympy`. +#: +#: :arg expr: The :mod:`pymbolic` expression to be mapped. +#: +sympy_to_pymbolic = SympyToPymbolicMapperWithField() + + +def simplify(expr, sympy_out=False): + """ + A wrapper to :func:`sympy.simplify`. + + :arg expr: The expression to be simplified. May either be a + :class:`pymbolic.primitives.Expression` or a :mod:`sympy` expression. + + The following keyword arguments are recognized: + + :arg sympy_out: A :class:`bool` determining whether to return the simplified + :mod:`sympy` expression or to first convert it to a + :class:`pymbolic.primitives.Expression`. + Defaults to *False*. + + :returns: A :class:`pymbolic.primitives.Expression`'s containing the + simplified form of ``expr`` if ``sympy_out`` is *True*, else a + :mod:`sympy` expression. + """ + + if isinstance(expr, pp.Expression): + expr = pymbolic_to_sympy(expr) + expr = sym.simplify(expr) + + if sympy_out: + return expr + else: + return sympy_to_pymbolic(expr) diff --git a/pystella/fourier/__init__.py b/pystella/fourier/__init__.py new file mode 100644 index 0000000..021c6a7 --- /dev/null +++ b/pystella/fourier/__init__.py @@ -0,0 +1,37 @@ +__copyright__ = "Copyright (C) 2019 Zachary J Weiner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +from pystella.fourier.dft import DFT, gDFT, pDFT +from pystella.fourier.rayleigh import RayleighGenerator +from pystella.fourier.projectors import Projector +from pystella.fourier.spectra import PowerSpectra +from pystella.fourier.derivs import SpectralGradientLaplacian + +__all__ = [ + "DFT", + "gDFT", + "pDFT", + "RayleighGenerator", + "Projector", + "PowerSpectra", + "SpectralGradientLaplacian", +] diff --git a/pystella/fourier/derivs.py b/pystella/fourier/derivs.py new file mode 100644 index 0000000..ba07e3e --- /dev/null +++ b/pystella/fourier/derivs.py @@ -0,0 +1,161 @@ +__copyright__ = "Copyright (C) 2019 Zachary J Weiner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + + +import loopy as lp +import pyopencl.array as cla + +__doc__ = """ +.. currentmodule:: pystella +.. autoclass:: SpectralGradientLaplacian +""" + + +class SpectralGradientLaplacian: + """ + Interface (analagous to :class:`~pystella.GradientLaplacian`) + for computing spatial gradients via spectral collocation. + + .. automethod:: __init__ + .. automethod:: __call__ + """ + + def __init__(self, fft, dk): + """ + The following arguments are required: + + :arg fft: An FFT object as returned by :func:`~pystella.DFT`. + ``grid_shape`` and ``dtype`` are determined by ``fft``'s attributes. + + :arg dk: A 3-:class:`tuple` of the momentum-space grid spacing of each + axis (i.e., the infrared cutoff of the grid in each direction). + """ + + self.fft = fft + grid_size = fft.grid_shape[0] * fft.grid_shape[1] * fft.grid_shape[2] + + queue = self.fft.sub_k['momenta_x'].queue + sub_k = list(x.get().astype('int') for x in self.fft.sub_k.values()) + k_names = ('k_x', 'k_y', 'k_z') + self.momenta = {} + self.momenta = {} + for mu, (name, kk) in enumerate(zip(k_names, sub_k)): + kk_mu = dk[mu] * kk.astype(fft.dtype) + self.momenta[name+'_2'] = cla.to_device(queue, kk_mu) + + # zero Nyquist mode for first derivatives + kk_mu[abs(sub_k[mu]) == fft.grid_shape[mu]//2] = 0. + kk_mu[sub_k[mu] == 0] = 0. + self.momenta[name+'_1'] = cla.to_device(queue, kk_mu) + + args = [ + lp.GlobalArg('fk', shape="(Nx, Ny, Nz)"), + lp.GlobalArg("k_x_1, k_x_2", self.fft.dtype, shape=('Nx',)), + lp.GlobalArg("k_y_1, k_y_2", self.fft.dtype, shape=('Ny',)), + lp.GlobalArg("k_z_1, k_z_2", self.fft.dtype, shape=('Nz',)), + ] + + from pystella.field import Field + fk = Field('fk') + pd = tuple(Field(pdi) for pdi in ('pdx_k', 'pdy_k', 'pdz_k')) + + indices = fk.indices + + from pymbolic import var + mom_vars = tuple(var(name+'_1') for name in k_names) + + pdx, pdy, pdz = \ + ({pdi: kk_i[indices[i]] * 1j * fk * (1/grid_size)} + for i, (pdi, kk_i) in enumerate(zip(pd, mom_vars))) + + mom_vars = tuple(var(name+'_2') for name in k_names) + kmag_sq = sum(kk_i[x_i]**2 for kk_i, x_i in zip(mom_vars, indices)) + lap = {Field('lap_k'): - kmag_sq * fk * (1/grid_size)} + + from pystella.elementwise import ElementWiseMap + options = lp.Options(return_dict=True) + self.pdx_knl = ElementWiseMap(pdx, args=args, h=0, options=options) + self.pdy_knl = ElementWiseMap(pdy, args=args, h=0, options=options) + self.pdz_knl = ElementWiseMap(pdz, args=args, h=0, options=options) + self.lap_knl = ElementWiseMap(lap, args=args, h=0, options=options) + self.grad_knl = ElementWiseMap({**pdx, **pdy, **pdz}, args=args, h=0, + options=options) + self.grad_lap_knl = ElementWiseMap({**pdx, **pdy, **pdz, **lap}, args=args, + h=0, options=options) + + self.pool = None + + def __call__(self, queue, fx, *, + lap=None, pdx=None, pdy=None, pdz=None, grd=None): + """ + Computes requested derivatives of the input ``fx``. + Provides the same interface as + :meth:`pystella.GradientLaplacian.__call__`. + """ + + if self.pool is None: + import pyopencl.tools as clt + self.pool = clt.MemoryPool(clt.ImmediateAllocator(queue)) + + from itertools import product + slices = list(product(*[range(n) for n in fx.shape[:-3]])) + + if grd is not None: + pdx = grd[..., 0, :, :, :] + pdy = grd[..., 1, :, :, :] + pdz = grd[..., 2, :, :, :] + + for s in slices: + fk = self.fft.dft(fx[s]) + if (lap is not None and pdx is not None + and pdy is not None and pdz is not None): + evt, out = \ + self.grad_lap_knl(queue, fk=fk, **self.momenta, + allocator=self.pool) + self.fft.idft(out['lap_k'], lap[s]) + self.fft.idft(out['pdx_k'], pdx[s]) + self.fft.idft(out['pdy_k'], pdy[s]) + self.fft.idft(out['pdz_k'], pdz[s]) + elif pdx is not None and pdy is not None and pdz is not None: + evt, out = self.grad_knl(queue, fk=fk, **self.momenta, + allocator=self.pool, filter_args=True) + self.fft.idft(out['pdx_k'], pdx[s]) + self.fft.idft(out['pdy_k'], pdy[s]) + self.fft.idft(out['pdz_k'], pdz[s]) + elif lap is not None: + evt, out = self.lap_knl(queue, fk=fk, **self.momenta, + allocator=self.pool, filter_args=True) + self.fft.idft(out['lap_k'], lap[s]) + elif pdx is not None: + evt, out = self.pdx_knl(queue, fk=fk, **self.momenta, + allocator=self.pool, filter_args=True) + self.fft.idft(out['pdx_k'], pdx[s]) + elif pdy is not None: + evt, out = self.pdy_knl(queue, fk=fk, **self.momenta, + allocator=self.pool, filter_args=True) + self.fft.idft(out['pdy_k'], pdy[s]) + elif pdz is not None: + evt, out = self.pdz_knl(queue, fk=fk, **self.momenta, + allocator=self.pool, filter_args=True) + self.fft.idft(out['pdz_k'], pdz[s]) + + return None diff --git a/pystella/fourier/dft.py b/pystella/fourier/dft.py new file mode 100644 index 0000000..ca4a1a9 --- /dev/null +++ b/pystella/fourier/dft.py @@ -0,0 +1,440 @@ +__copyright__ = "Copyright (C) 2019 Zachary J Weiner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + + +import numpy as np +import pyopencl.array as cla + +__doc__ = """ +.. currentmodule:: pystella +.. autofunction:: DFT +.. currentmodule:: pystella.fourier +.. autoclass:: pystella.fourier.dft.BaseDFT +.. autoclass:: pDFT +.. autoclass:: gDFT +.. currentmodule:: pystella +""" + + +def DFT(decomp, context, queue, grid_shape, dtype, **kwargs): + """ + A wrapper to the creation of various FFT class options which determines + whether to use :class:`pystella.fourier.gDFT` (for single-GPU FFTs via + :mod:`gpyfft`) or :class:`pystella.fourier.pDFT` + (for distributed, CPU FFTs via :class:`mpi4py_fft.PFFT`), + based on the processor shape ``proc_shape`` and a flag ``use_gpu``. + + :arg decomp: A :class:`DomainDecomposition`. + + :arg context: A :class:`pyopencl.Context`. + + :arg queue: A :class:`pyopencl.CommandQueue`. + + :arg grid_shape: A 3-:class:`tuple` specifying the shape of position-space arrays + to be transformed. + + :arg dtype: The datatype of real arrays to be transformed. The complex + datatype is chosen to have the same precision. + + The following keyword-only arguments are recognized: + + :arg use_gpu: A :class:`bool` dictating whether to use + :class:`pystella.fourier.gDFT`. + Defaults to *True*, i.e., this flag must be set to *False* to override the + default choice to use :class:`pystella.fourier.gDFT` on a single rank. + + Any remaining keyword arguments are passed to :class:`pystella.fourier.pDFT`, + should this function return such an object. + """ + + use_gpu = kwargs.pop('use_gpu', True) + proc_shape = decomp.proc_shape + if proc_shape == (1, 1, 1) and use_gpu: + return gDFT(decomp, context, queue, grid_shape, dtype) + else: + # local_shape = tuple(N//P for N, P in zip(grid_shape, proc_shape)) + # tmp = cla.zeros(queue, local_shape, dtype) + return pDFT(decomp, queue, grid_shape, proc_shape, dtype, **kwargs) + + +def _transfer_array(a, b): + # set a = b + if isinstance(a, np.ndarray) and isinstance(b, cla.Array): + b.get(ary=a) + elif isinstance(a, cla.Array) and isinstance(b, np.ndarray): + a.set(b) + return a + + +class BaseDFT: + """ + Base class for all FFT options. + + .. automethod:: shape + .. automethod:: dft + .. automethod:: idft + .. automethod:: zero_corner_modes + """ + + # pylint: disable=no-member + def shape(self, forward_output=True): + """ + :arg forward_output: A :class:`bool` specifying whether to output the + shape for the result of the forward Fourier transform. + + :returns: A 3-:class:`tuple` of the (per--MPI-rank) shape of the requested + array (as specified by ``forward_output``). + """ + + raise NotImplementedError + + def forward_transform(self, fx, fk, **kwargs): + raise NotImplementedError + + def backward_transform(self, fk, fx, **kwargs): + raise NotImplementedError + + def dft(self, fx=None, fk=None, **kwargs): + """ + Computes the forward Fourier transform. + + :arg fx: The array to be transformed. + Can be a :class:`pyopencl.array.Array` with or without halo padding + (which will be removed by + :meth:`pystella.DomainDecomposition.remove_halos` + if needed) or a :class:`numpy.ndarray` without halo padding. + Arrays are copied as necessary. + Defaults to *None*, in which case :attr:`fx` (attached + to the transform) is used. + + :arg fk: The array in which to output the result of the transform. + Can be a :class:`pyopencl.array.Array` or a :class:`numpy.ndarray`. + Arrays are copied as necessary. + Defaults to *None*, in which case :attr:`fk` (attached + to the transform) is used. + + :returns: The forward Fourier transform of ``fx``. + Either ``fk`` if supplied or :attr:`fk`. + + Any remaining keyword arguments are passed to :meth:`forward_transform`. + + .. note:: + If you need the result of multiple Fourier transforms, you must + either supply an ``fk`` array or copy the output. + Namely, without passing ``fk`` the same memory (attached to the + transform object) will be used as output, overwriting any prior + results. + """ + + if fx is not None: + if fx.shape != self.shape(False): + if isinstance(fx, cla.Array): + queue = fx.queue + elif isinstance(self.fx, cla.Array): + queue = self.fx.queue + else: + queue = None + self.decomp.remove_halos(queue, fx, self.fx) + _fx = self.fx + elif not isinstance(fx, type(self.fx)): + _fx = _transfer_array(self.fx, fx) + else: + _fx = fx + else: + _fx = self.fx + + if fk is not None: + if not isinstance(fk, type(self.fk)): + _fk = self.fk + else: + _fk = fk + else: + _fk = self.fk + + _fk = self.forward_transform(_fx, _fk, **kwargs) + + if fk is not None: + if not isinstance(fk, type(self.fk)): + _fk = _transfer_array(fk, _fk) + else: + _fk = fk + else: + _fk = _fk + + return _fk + + def idft(self, fk=None, fx=None, **kwargs): + """ + Computes the backward Fourier transform. + + :arg fk: The array to be transformed. + Can be a :class:`pyopencl.array.Array` or a :class:`numpy.ndarray`. + Arrays are copied as necessary. + Defaults to *None*, in which case :attr:`fk` (attached + to the transform) is used. + + :arg fx: The array in which to output the result of the transform. + Can be a :class:`pyopencl.array.Array` with or without halo padding + (which will be restored by + :meth:`pystella.DomainDecomposition.restore_halos` + if needed) or a :class:`numpy.ndarray` without halo padding. + Arrays are copied as necessary. + Defaults to *None*, in which case :attr:`fx` (attached + to the transform) is used. + + :returns: The forward Fourier transform of ``fx``. + Either ``fk`` if supplied or :attr:`fk`. + + Any remaining keyword arguments are passed to :meth:`backward_transform`. + + .. note:: + If you need the result of multiple Fourier transforms, you must + either supply an ``fx`` array or copy the output. + Namely, without passing ``fx`` the same memory (attached to the + transform object) will be used as output, overwriting any prior + results. + """ + + if fk is not None: + if not isinstance(fk, type(self.fk)): + _fk = _transfer_array(self.fk, fk) + else: + _fk = fk + else: + _fk = self.fk + + if fx is not None: + if fx.shape == self.shape(False) and isinstance(fx, type(self.fx)): + _fx = fx + else: + _fx = self.fx + else: + _fx = self.fx + + _fx = self.backward_transform(_fk, _fx, **kwargs) + + if fx is not None: + if fx.shape != self.shape(False): + if isinstance(fx, cla.Array): + queue = fx.queue + elif isinstance(self.fx, cla.Array): + queue = self.fx.queue + else: + queue = None + self.decomp.restore_halos(queue, _fx, fx) + _fx = fx + elif not isinstance(fx, type(self.fx)): + _fx = _transfer_array(fx, _fx) + else: + _fx = _fx + else: + _fx = _fx + + return _fx + + def zero_corner_modes(self, array, only_imag=False): + """ + Zeros the "corner" modes (modes where each component of its + integral wavenumber is either zero or the Nyquist along + that axis) of ``array`` (or just the imaginary part). + + :arg array: The array to operate on. + May be a :class:`pyopencl.array.Array` or a :class:`numpy.ndarray`. + + :arg only_imag: A :class:`bool` determining whether to only + set the imaginary part of the array to zero. + Defaults to *False*, i.e., setting the mode to ``0+0j``. + """ + + sub_k = list(x.get().astype('int') for x in self.sub_k.values()) + shape = self.grid_shape + + where_to_zero = [] + for mu in range(3): + kk = sub_k[mu] + where_0 = np.argwhere(abs(kk) == 0).reshape(-1) + where_N2 = np.argwhere(abs(kk) == shape[mu]//2).reshape(-1) + where_to_zero.append(np.concatenate([where_0, where_N2])) + + from itertools import product + for i, j, k in product(*where_to_zero): + if only_imag: + array[i, j, k] = array[i, j, k].real + else: + array[i, j, k] = 0. + + return array + + +_c_dtype_mapping = {'float32': 'complex64', 'float64': 'complex128', + np.float32: 'complex64', np.float64: 'complex128'} + + +class pDFT(BaseDFT): + """ + A wrapper to :class:`mpi4py_fft.PFFT` to compute distributed Fast Fourier + transforms. + + See :class:`pystella.fourier.dft.BaseDFT`. + + .. automethod:: __init__ + """ + + def __init__(self, decomp, queue, grid_shape, proc_shape, dtype, **kwargs): + """ + :arg decomp: A :class:`pystella.DomainDecomposition`. + + :arg queue: A :class:`pyopencl.CommandQueue`. + + :arg grid_shape: A 3-:class:`tuple` specifying the shape of position-space + arrays to be transformed. + + :arg proc_shape: A 3-:class:`tuple` specifying the shape of the MPI + processor grid. + + :arg dtype: The datatype of real arrays to be transformed. The complex + datatype is chosen to have the same precision. + + Any keyword arguments are passed to :meth:`mpi4py_fft.PFFT.__init__()`. + """ + + self.decomp = decomp + self.grid_shape = grid_shape + self.proc_shape = proc_shape + self.dtype = dtype + cdtype = _c_dtype_mapping[dtype] + self.cdtype = cdtype + + if proc_shape[0] > 1 and proc_shape[1] == 1: + slab = True + else: + slab = False + + from mpi4py_fft.pencil import Subcomm + default_kwargs = dict( + # FIXME: this is weird + axes=([0], [1], [2]), threads=16, backend='fftw', collapse=True, + ) + default_kwargs.update(kwargs) + comm = decomp.comm if slab else Subcomm(decomp.comm, proc_shape) + + from mpi4py_fft import PFFT + self.fft = PFFT(comm, grid_shape, dtype=dtype, slab=slab, **default_kwargs) + + for transform in self.fft.xfftn: + transform.M = 1 # ensure normalization is not applied + + self.fx = self.fft.forward.input_array + self.fk = self.fft.forward.output_array + + from numpy.fft import fftfreq + k = [fftfreq(n, 1/n).astype(dtype) for n in grid_shape] + + if dtype in ('float32', 'float64', np.float32, np.float64): + from numpy.fft import rfftfreq + k[-1] = rfftfreq(grid_shape[-1], 1/grid_shape[-1]).astype(dtype) + + slc = self.fft.local_slice(True) + names = ('momenta_x', 'momenta_y', 'momenta_z') + self.sub_k = {direction: cla.to_device(queue, k_i[s_i]) + for direction, k_i, s_i in zip(names, k, slc)} + + def shape(self, forward_output=True): + return self.fft.shape(forward_output=forward_output) + + def forward_transform(self, fx, fk, **kwargs): + return self.fft.forward(input_array=fx, output_array=fk, **kwargs) + + def backward_transform(self, fk, fx, **kwargs): + return self.fft.backward(input_array=fk, output_array=fx, **kwargs) + + +class gDFT(BaseDFT): + """ + A wrapper to :mod:`gpyfft` to compute real-to-complex and complex-to-real + Fast Fourier transforms. + + See :class:`pystella.fourier.dft.BaseDFT`. + + .. automethod:: __init__ + """ + + def __init__(self, decomp, context, queue, grid_shape, dtype): + """ + :arg decomp: A :class:`pystella.DomainDecomposition`. + + :arg context: A :class:`pyopencl.Context`. + + :arg queue: A :class:`pyopencl.CommandQueue`. + + :arg grid_shape: A 3-:class:`tuple` specifying the shape of position-space + arrays to be transformed. + + :arg dtype: The datatype of real arrays to be transformed. The complex + datatype is chosen to have the same precision. + """ + + self.decomp = decomp + self.grid_shape = grid_shape + self.dtype = dtype + cdtype = _c_dtype_mapping[dtype] + self.cdtype = cdtype + + self.fx = cla.zeros(queue, grid_shape, dtype) + self.fk = cla.zeros(queue, self.shape(True), cdtype) + from gpyfft import FFT + self.forward = FFT(context, queue, self.fx, out_array=self.fk, real=True, + scale_forward=1, scale_backward=1) + self.backward = FFT(context, queue, self.fk, out_array=self.fx, real=True, + scale_forward=1, scale_backward=1) + + from numpy.fft import fftfreq, rfftfreq + names = ('momenta_x', 'momenta_y', 'momenta_z') + + slc = ((), (), (),) + k = [fftfreq(n, 1/n).astype(dtype) for n in grid_shape] + self.sub_k_c = {direction: cla.to_device(queue, k_i[s_i]) + for direction, k_i, s_i in zip(names, k, slc)} + + k[-1] = rfftfreq(grid_shape[-1], 1/grid_shape[-1]).astype(dtype) + self.sub_k = {direction: cla.to_device(queue, k_i[s_i]) + for direction, k_i, s_i in zip(names, k, slc)} + + def shape(self, forward_output=True): + if forward_output: + shape = list(self.grid_shape) + shape[-1] = shape[-1]//2+1 + return tuple(shape) + else: + return self.grid_shape + + def forward_transform(self, fx, fk, **kwargs): + event, = self.forward.enqueue_arrays(data=fx, result=fk, forward=True) + fx.add_event(event) + fk.add_event(event) + return fk + + def backward_transform(self, fk, fx, **kwargs): + event, = self.backward.enqueue_arrays(data=fk, result=fx, forward=False) + fx.add_event(event) + fk.add_event(event) + return fx diff --git a/pystella/fourier/projectors.py b/pystella/fourier/projectors.py new file mode 100644 index 0000000..a5d7600 --- /dev/null +++ b/pystella/fourier/projectors.py @@ -0,0 +1,343 @@ +__copyright__ = "Copyright (C) 2019 Zachary J Weiner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + + +import pyopencl.array as cla +import loopy as lp + +__doc__ = """ +.. currentmodule:: pystella +.. autoclass:: Projector +""" + + +class Projector: + """ + Constructs kernels to projector vectors to and from their polarization basis + and to project out longitudinal modes, and to project a tensor field to its + transverse and traceless component. + + .. automethod:: __init__ + .. automethod:: transversify + .. automethod:: pol_to_vec + .. automethod:: vec_to_pol + .. automethod:: transverse_traceless + """ + + def get_pol_to_vec_knl(self): + return lp.make_kernel( + "[Nx, Ny, Nz] -> \ + { [i,j,k,mu]: 0<=i eps[mu] = 0 + end + <> kx = eff_mom_x[i] + <> ky = eff_mom_y[j] + <> kz = eff_mom_z[k] + + if fabs(kx) < 1.e-10 and fabs(ky) < 1.e-10 + if fabs(kz) > 1.e-10 + eps[0] = 1 / sqrt2 + eps[1] = 1j / sqrt2 + end + else + <> Kappa = sqrt(kx**2 + ky**2) + <> kmag = sqrt(kx**2 + ky**2 + kz**2) + + eps[0] = (kx * kz / kmag - 1j * ky) / Kappa / sqrt2 + eps[1] = (ky * kz / kmag + 1j * kx) / Kappa / sqrt2 + eps[2] = - Kappa / kmag / sqrt2 + end + + vector[mu, i, j, k] = eps[mu] * plus[i, j, k] \ + + conj(eps[mu]) * minus[i, j, k] {dup=mu} + end + + """, + seq_dependencies=True, + default_offset=lp.auto, + lang_version=(2018, 2), + ) + + def get_vec_to_pol_knl(self): + return lp.make_kernel( + "[Nx, Ny, Nz] -> \ + { [i,j,k,mu]: 0<=i eps[mu] = 0 + end + <> kx = eff_mom_x[i] + <> ky = eff_mom_y[j] + <> kz = eff_mom_z[k] + + if fabs(kx) < 1.e-10 and fabs(ky) < 1.e-10 + if fabs(kz) > 1.e-10 + eps[0] = 1 / sqrt2 + eps[1] = 1j / sqrt2 + end + else + <> Kappa = sqrt(kx**2 + ky**2) + <> kmag = sqrt(kx**2 + ky**2 + kz**2) + + eps[0] = (kx * kz / kmag - 1j * ky) / Kappa / sqrt2 + eps[1] = (ky * kz / kmag + 1j * kx) / Kappa / sqrt2 + eps[2] = - Kappa / kmag / sqrt2 + end + + plus[i, j, k] = sum(mu, conj(eps[mu]) * vector[mu, i, j, k]) {dup=mu} + minus[i, j, k] = sum(mu, eps[mu] * vector[mu, i, j, k]) {dup=mu} + end + """, + seq_dependencies=True, + default_offset=lp.auto, + lang_version=(2018, 2), + ) + + def get_transversify_knl(self): + return lp.make_kernel( + "[Nx, Ny, Nz] -> \ + { [i,j,k,mu]: 0<=i kvec[0] = eff_mom_x[i] + kvec[1] = eff_mom_y[j] + kvec[2] = eff_mom_z[k] + if fabs(kvec[0]) < 1.e-14 \ + and fabs(kvec[1]) < 1.e-14 \ + and fabs(kvec[2]) < 1.e-14 + vectorT[mu, i, j, k] = 0 + else + <> kmag = sqrt(sum(mu, kvec[mu]**2)) {dup=mu} + <> div = sum(mu, kvec[mu] * vector[mu, i, j, k]) {dup=mu} + + vectorT[mu, i, j, k] = vector[mu, i, j, k] \ + - kvec[mu] / kmag**2 * div {dup=mu,nosync=*} + end + end + """, + seq_dependencies=True, + default_offset=lp.auto, + lang_version=(2018, 2), + ) + + def get_tt_knl(self): + knl = lp.make_kernel( + "[Nx, Ny, Nz] -> \ + { [i,j,k,a,b,c,d]: \ + 0<=i kvec[0] = eff_mom_x[i] + kvec[1] = eff_mom_y[j] + kvec[2] = eff_mom_z[k] + <> kmag = sqrt(kvec[0]**2 + kvec[1]**2 + kvec[2]**2) + kvec[0] = kvec[0] / kmag + kvec[1] = kvec[1] / kmag + kvec[2] = kvec[2] / kmag + + id(a, b) := ((7 - if(a <= b, a, b)) * if(a <= b, a, b)) // 2 \ + - 4 + if(a <= b, b, a) + P(a, b) := if(a == b, 1, 0) - kvec[a-1] * kvec[b-1] + + for a, b + if a <= b + hTT[id(a, b)] = sum((c, d), \ + (P(a, c) * P(d, b) \ + - .5 * P(a, b) * P(c, d)) \ + * hij[id(c, d), i, j, k]) + end + end + + for a, b + if a <= b + hijTT[id(a, b), i, j, k] = hTT[id(a, b)] {dup=a,dup=b} + end + end + end + """, + [ + lp.GlobalArg('hij', shape='(6, Nx, Ny, Nz)'), + lp.GlobalArg('hijTT', shape='(6, Nx, Ny, Nz)'), + lp.TemporaryVariable('hTT', shape='(6,)'), + '...' + ], + seq_dependencies=True, + default_offset=lp.auto, + lang_version=(2018, 2), + ) + return lp.expand_subst(knl) + + def __init__(self, fft, effective_k): + """ + :arg fft: An FFT object as returned by :func:`DFT`. + ``grid_shape`` and ``dtype`` are determined by ``fft``'s attributes. + + :arg effective_k: A :class:`callable` with signature ``(k, dx)`` returning + the effective momentum (eigenvalue) of the corresponding stencil. + That is, projections are implemented relative to the stencil + whose eigenvalues are returned by this function. + """ + + self.fft = fft + + if not callable(effective_k): + if effective_k != 0: + from pystella.derivs import FirstCenteredDifference + h = effective_k + effective_k = FirstCenteredDifference(h).get_eigenvalues + else: + def effective_k(k, dx): # pylint: disable=function-redefined + return k + + from math import pi + grid_shape = fft.grid_shape + # since projectors only need the unit momentum vectors, can pass + # k = k_hat * dk * dx = k_hat * 2 * pi * grid_shape and dx = 1, + # where k_hat is the integer momentum gridpoint + dk_dx = tuple(2 * pi / Ni for Ni in grid_shape) + + queue = self.fft.sub_k['momenta_x'].queue + sub_k = list(x.get().astype('int') for x in self.fft.sub_k.values()) + eff_mom_names = ('eff_mom_x', 'eff_mom_y', 'eff_mom_z') + self.eff_mom = {} + for mu, (name, kk) in enumerate(zip(eff_mom_names, sub_k)): + eff_k = effective_k(kk.astype(fft.dtype) * dk_dx[mu], 1) + eff_k[abs(sub_k[mu]) == fft.grid_shape[mu]//2] = 0. + eff_k[sub_k[mu] == 0] = 0. + self.eff_mom[name] = cla.to_device(queue, eff_k) + + def process(knl): + knl = lp.fix_parameters(knl, sqrt2=2**.5) + knl = lp.split_iname(knl, "k", 32, outer_tag="g.0", inner_tag="l.0") + knl = lp.split_iname(knl, "j", 1, outer_tag="g.1", inner_tag="unr") + knl = lp.split_iname(knl, "i", 1, outer_tag="g.2", inner_tag="unr") + knl = lp.set_options(knl, enforce_variable_access_ordered="no_check") + return knl + + self.pol_to_vec_knl = process(self.get_pol_to_vec_knl()) + self.vec_to_pol_knl = process(self.get_vec_to_pol_knl()) + self.transversify_knl = process(self.get_transversify_knl()) + self.tt_knl = process(self.get_tt_knl()) + + def transversify(self, queue, vector, vector_T=None): + """ + Projects out longitudinal modes of a vector field. + + :arg queue: A :class:`pyopencl.CommandQueue`. + + :arg vector: The array containing the + momentum-space vector field to be projected. + Must have shape ``(3,)+k_shape``, where + ``k_shape`` is the shape of a single momentum-space field array. + + :arg vector_T: The array in wihch the resulting + projected vector field will be stored. + Must have the same shape as ``vector``. + Defaults to *None*, in which case the projection is performed in-place. + + :returns: The :class:`pyopencl.Event` associated with the kernel invocation. + """ + + vector_T = vector_T or vector + evt, _ = self.transversify_knl(queue, **self.eff_mom, + vector=vector, vectorT=vector) + return evt + + def pol_to_vec(self, queue, plus, minus, vector): + """ + Projects the plus and minus polarizations of a vector field onto the + vector components. + + :arg queue: A :class:`pyopencl.CommandQueue`. + + :arg plus: The array containing the + momentum-space field of the plus polarization. + + :arg minus: The array containing the + momentum-space field of the minus polarization. + + :arg vector: The array into which the vector + field will be stored. + Must have shape ``(3,)+k_shape``, where ``k_shape`` is the shape of a + single momentum-space field array. + + :returns: The :class:`pyopencl.Event` associated with the kernel invocation. + """ + + evt, _ = self.pol_to_vec_knl(queue, **self.eff_mom, + vector=vector, plus=plus, minus=minus) + return evt + + def vec_to_pol(self, queue, plus, minus, vector): + """ + Projects the components of a vector field onto the basis of plus and + minus polarizations. + + :arg queue: A :class:`pyopencl.CommandQueue`. + + :arg plus: The array into which will be stored the + momentum-space field of the plus polarization. + + :arg minus: The array into which will be stored the + momentum-space field of the minus polarization. + + :arg vector: The array whose polarization + components will be computed. + Must have shape ``(3,)+k_shape``, where ``k_shape`` is the shape of a + single momentum-space field array. + + :returns: The :class:`pyopencl.Event` associated with the kernel invocation. + """ + + evt, _ = self.vec_to_pol_knl(queue, **self.eff_mom, + vector=vector, plus=plus, minus=minus) + return evt + + def transverse_traceless(self, queue, hij, hij_TT=None): + """ + Projects a tensor field to be transverse and traceless. + + :arg queue: A :class:`pyopencl.CommandQueue`. + + :arg hij: The array containing the + momentum-space tensor field to be projected. + Must have shape ``(6,)+k_shape``, where + ``k_shape`` is the shape of a single momentum-space field array. + + :arg hij_TT: The array in wihch the resulting projected + tensor field will be stored. + Must have the same shape as ``hij``. + Defaults to *None*, in which case the projection is performed in-place. + + :returns: The :class:`pyopencl.Event` associated with the kernel invocation. + """ + + hij_TT = hij_TT or hij + evt, _ = self.tt_knl(queue, hij=hij, hijTT=hij_TT, **self.eff_mom) + + # re-set to zero + for mu in range(6): + self.fft.zero_corner_modes(hij_TT[mu]) diff --git a/pystella/fourier/rayleigh.py b/pystella/fourier/rayleigh.py new file mode 100644 index 0000000..c3b4215 --- /dev/null +++ b/pystella/fourier/rayleigh.py @@ -0,0 +1,395 @@ +__copyright__ = "Copyright (C) 2019 Zachary J Weiner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + + +import numpy as np +import pyopencl.array as cla +import pyopencl.clrandom as clr +import loopy as lp + +__doc__ = """ +.. currentmodule:: pystella +.. autoclass:: RayleighGenerator +""" + + +def make_hermitian(fk): + grid_shape = list(fk.shape) + grid_shape[-1] = 2 * (grid_shape[-1] - 1) + pos = [np.arange(0, Ni//2+1) for Ni in grid_shape] + neg = [np.concatenate([np.array([0]), np.arange(Ni-1, Ni//2-1, -1)]) + for Ni in grid_shape] + + for k in [0, grid_shape[-1]//2]: + for n, p in zip(neg[0], pos[0]): + fk[n, neg[1], k] = np.conj(fk[p, pos[1], k]) + fk[p, neg[1], k] = np.conj(fk[n, pos[1], k]) + for n, p in zip(neg[1], pos[1]): + fk[neg[0], n, k] = np.conj(fk[pos[0], p, k]) + fk[neg[0], p, k] = np.conj(fk[pos[0], n, k]) + + for i in [0, grid_shape[0]//2]: + for j in [0, grid_shape[1]//2]: + for k in [0, grid_shape[2]//2]: + fk[i, j, k] = np.real(fk[i, j, k]) + return fk + + +class RayleighGenerator: + """ + Constructs kernels to generate Gaussian-random fields with a chosen power + spectrum in Fourier space by drawing modes according to the corresponding + Rayleigh distribution. + + .. automethod:: __init__ + .. automethod:: generate + .. automethod:: init_field + .. automethod:: init_transverse_vector + .. automethod:: init_vector_from_pol + + In addition, the following methods apply the WKB approximation to + initialize a field and its (conformal-) time derivative in FLRW spacetime. + + .. automethod:: generate_WKB + .. automethod:: init_WKB_fields + """ + + def get_wkb_knl(self): + knl = lp.make_kernel( + "[Nx, Ny, Nz] -> { [i,j,k]: 0<=i amp_1 = sqrt(- log(rands[0, i, j, k])) + <> amp_2 = sqrt(- log(rands[2, i, j, k])) + <> phs_1 = exp(1j * 2. * pi * rands[1, i, j, k]) + <> phs_2 = exp(1j * 2. * pi * rands[3, i, j, k]) + <> power = f_power[i, j, k] + <> Lmode = phs_1 * amp_1 * sqrt(power) + <> Rmode = phs_2 * amp_2 * sqrt(power) + <> fk_ = (Lmode + Rmode) / sqrt2 + fk[i, j, k] = fk_ + dfk[i, j, k] = 1j * wk[i, j, k] * (Lmode - Rmode) / sqrt2 - hubble * fk_ + """, + [ + lp.ValueArg("hubble", self.dtype), + lp.GlobalArg('fk, dfk', shape=lp.auto, dtype=self.cdtype), + "..." + ], + seq_dependencies=True, + silenced_warnings=['inferred_iname'], + lang_version=(2018, 2), + ) + knl = lp.set_options(knl, return_dict=True) + return knl + + def get_non_wkb_knl(self): + knl = lp.make_kernel( + "[Nx, Ny, Nz] -> { [i,j,k]: 0<=i amp = sqrt(- log(rands[0, i, j, k])) + <> phs = exp(1j * 2. * pi * rands[1, i, j, k]) + fk[i, j, k] = phs * amp * sqrt(f_power[i, j, k]) + """, + [lp.GlobalArg('fk', shape=lp.auto, dtype=self.cdtype), "..."], + seq_dependencies=True, + lang_version=(2018, 2), + ) + return knl + + def __init__(self, context, fft, dk, volume, **kwargs): + """ + :arg context: A :class:`pyopencl.Context`. + + :arg fft: An FFT object as returned by :func:`DFT`. + + :arg dk: A 3-:class:`tuple` of the momentum-space grid spacing of each + axis (i.e., the infrared cutoff of the grid in each direction). + + :arg volume: The physical volume of the grid. + + The following keyword-only arguments are recognized: + + :arg seed: The seed to the random number generator. + Defaults to ``13298``. + """ + + self.fft = fft + self.dtype = fft.dtype + self.cdtype = fft.cdtype + self.volume = volume + + sub_k = list(x.get() for x in self.fft.sub_k.values()) + kvecs = np.meshgrid(*sub_k, indexing='ij', sparse=False) + self.rkmags = np.sqrt(sum((dki * ki)**2 for dki, ki in zip(dk, kvecs))) + + seed = kwargs.pop('seed', 13298) + self.rng = clr.ThreefryGenerator(context, seed=seed) + + def parallelize(knl): + knl = lp.fix_parameters(knl, pi=np.pi, sqrt2=np.sqrt(2.)) + knl = lp.split_iname(knl, 'k', 32, inner_tag='l.0', outer_tag='g.0') + knl = lp.split_iname(knl, 'j', 1, inner_tag='unr', outer_tag='g.1') + knl = lp.split_iname(knl, 'i', 1, inner_tag='unr', outer_tag='g.2') + return knl + + self.wkb_knl = parallelize(self.get_wkb_knl()) + self.non_wkb_knl = parallelize(self.get_non_wkb_knl()) + + def post_process(self, fk, is_real): + from pystella.fourier import gDFT + if is_real and isinstance(self.fft, gDFT): + # real fields must be Hermitian-symmetric, and it seems we + # need to do this manually when FFT'ing with gpyfft + fk = make_hermitian(fk) + # can at least do this in general + self.fft.zero_corner_modes(fk, only_imag=True) + return fk + + # wrapper to remove 1/0 and set homogeneous power to zero + def _ps_wrapper(self, ps_func, wk, kmags): + if kmags[0, 0, 0] == 0.: + wk0 = wk[0, 0, 0] + wk[0, 0, 0] = 1. + power = ps_func(wk) + if kmags[0, 0, 0] == 0.: + power[0, 0, 0] = 0. + wk[0, 0, 0] = wk0 + return power + + def generate(self, queue, random=True, field_ps=lambda kmag: 1/2/kmag, + norm=1, is_real=True, window=lambda kmag: 1.): + """ + Generate a 3-D array of Fourier modes with a given power spectrum and + random phases. + + :arg queue: A :class:`pyopencl.CommandQueue`. + + :arg random: Whether to randomly sample the Rayleigh distribution + of mode amplitudes. + Defaults to *True*. + + :arg field_ps: A :class:`callable` returning the desired + power spectrum of the field as a function of momentum ``kmag``. + Defaults to the Bunch-Davies vacuum, + ``lambda kmag: 1/2/kmag``. + + :arg norm: A constant normalization factor by which to multiply all + power spectra. + Defaults to ``1``. + + :arg is_real: Whether the fields to be generated are real or complex + (in position space). + Defaults to *True*. + + .. note:: + + Currently, only ``is_real=True`` is supported. + + :arg window: A :class:`callable` window function filtering initial mode + amplitudes. + Defaults to ``lambda kmag: 1``, i.e., no filter. + + :returns: An :class:`numpy.ndarray` containing the generated Fourier modes + of the field. + """ + + amplitude_sq = norm / self.volume + kmags = self.rkmags # if is_real else self.ckmags + + rands = self.rng.uniform(queue, (2,)+kmags.shape, self.dtype) + if not random: + rands[0] = np.exp(-1) + + f_power = (amplitude_sq * window(kmags)**2 + * self._ps_wrapper(field_ps, kmags, kmags)) + + evt, (fk,) = self.non_wkb_knl(queue, rands=rands, f_power=f_power, + out_host=True) + + return self.post_process(fk, is_real) + + def init_field(self, fx, queue=None, **kwargs): + """ + A wrapper which calls :meth:`generate` to initialize a field + in Fourier space and returns its inverse Fourier transform. + + :arg fx: The array in which the field will be stored. + + The following keyword arguments are recognized: + + :arg queue: A :class:`pyopencl.CommandQueue`. + Defaults to ``fx.queue``. + + Any additional keyword arguments are passed to :meth:`generate`. + """ + + queue = queue or fx.queue + fk = self.generate(queue, **kwargs) + self.fft.idft(fk, fx) + + def init_transverse_vector(self, projector, vector, queue=None, **kwargs): + """ + A wrapper which calls :meth:`generate` to initialize a transverse + three-vector field in Fourier space and returns its inverse Fourier + transform. + Each component will have the same power spectrum. + + :arg projector: A :class:`Projector` used to project out + longitudinal components of the vector field. + + :arg vector: The array in which the vector field will be stored. + + The following keyword arguments are recognized: + + :arg queue: A :class:`pyopencl.CommandQueue`. + Defaults to ``vector.queue``. + + Any additional keyword arguments are passed to :meth:`generate`. + """ + + queue = queue or vector.queue + + vector_k = cla.empty(queue, (3,)+self.fft.shape(True), self.cdtype) + + for mu in range(3): + fk = self.generate(queue, **kwargs) + vector_k[mu].set(fk) + + projector.transversify(queue, vector_k) + + for mu in range(3): + self.fft.idft(vector_k[mu], vector[mu]) + + def init_vector_from_pol(self, projector, vector, plus_ps, minus_ps, + queue=None, **kwargs): + """ + A wrapper which calls :meth:`generate` to initialize a transverse + three-vector field in Fourier space and returns its inverse Fourier + transform. + In contrast to :meth:`init_transverse_vector`, modes are generated + for the plus and minus polarizations of the vector field, from which + the vector field itself is constructed. + + :arg projector: A :class:`Projector` used to project out + longitudinal components of the vector field. + + :arg vector: The array in which the vector field will be stored. + + :arg plus_ps: A :class:`callable` returning the power spectrum of the + plus polarization as a function of momentum ``kmag``. + + :arg minus_ps: A :class:`callable` returning the power spectrum of the + minus polarization as a function of momentum ``kmag``. + + The following keyword arguments are recognized: + + :arg queue: A :class:`pyopencl.CommandQueue`. + Defaults to ``vector.queue``. + + Any additional keyword arguments are passed to :meth:`generate`. + """ + + queue = queue or vector.queue + + fk = self.generate(queue, field_ps=plus_ps, **kwargs) + plus_k = cla.to_device(queue, fk) + + fk = self.generate(queue, field_ps=minus_ps, **kwargs) + minus_k = cla.to_device(queue, fk) + + vector_k = cla.empty(queue, (3,)+self.fft.shape(True), self.cdtype) + projector.pol_to_vec(queue, plus_k, minus_k, vector_k) + + for mu in range(3): + self.fft.idft(vector_k[mu], vector[mu]) + + def generate_WKB(self, queue, random=True, + field_ps=lambda wk: 1/2/wk, + norm=1, omega_k=lambda kmag: kmag, + hubble=0., is_real=True, window=lambda kmag: 1.): + """ + Generate a 3-D array of Fourier modes with a given power spectrum and + random phases, along with that of its time derivative + according to the WKB approximation (for Klein-Gordon fields in + conformal FLRW spacetime). + + Arguments match those of :meth:`generate`, with the following + exceptions/additions: + + :arg field_ps: A :class:`callable` returning the desired + power spectrum of the field as a function of :math:`\\omega(k)``. + Defaults to the Bunch-Davies vacuum, ``lambda wk: 1/2/wk``, + where ``wk=omega_k(kmag)``. + + :arg omega_k: A :class:`callable` defining the dispersion relation + of the field. + Defaults to ``lambda kmag: kmag``. + + :arg hubble: The value of the (conformal) Hubble parameter to use in + generating modes for the field's time derivative. + Only used when ``WKB=True``. + Defaults to ``0``. + + :returns: A tuple ``(fk, dfk)`` containing the generated Fourier modes + of the field and its time derivative. + """ + + amplitude_sq = norm / self.volume + kmags = self.rkmags # if is_real else self.ckmags + kshape = kmags.shape + + rands = self.rng.uniform(queue, (4,)+kshape, self.dtype) + if not random: + rands[0] = rands[2] = np.exp(-1) + + wk = omega_k(kmags) + f_power = (amplitude_sq * window(kmags)**2 + * self._ps_wrapper(field_ps, wk, kmags)) + + evt, out = self.wkb_knl(queue, rands=rands, hubble=hubble, + wk=wk, f_power=f_power, out_host=True) + + fk = self.post_process(out['fk'], is_real) + dfk = self.post_process(out['dfk'], is_real) + + return fk, dfk + + def init_WKB_fields(self, fx, dfx, queue=None, **kwargs): + """ + A wrapper which calls :meth:`generate_WKB` to initialize a field and + its dime derivative in Fourier space and inverse Fourier transform + the results. + + :arg fx: The array in which the field will be stored. + + :arg dfx: The array in which the field's time derivative will + be stored. + + :arg queue: A :class:`pyopencl.CommandQueue`. + Defaults to ``fx.queue``. + + Any additional keyword arguments are passed to :meth:`generate_WKB`. + """ + + queue = queue or fx.queue + fk, dfk = self.generate_WKB(queue, **kwargs) + self.fft.idft(fk, fx) + self.fft.idft(dfk, dfx) diff --git a/pystella/fourier/spectra.py b/pystella/fourier/spectra.py new file mode 100644 index 0000000..c19ac3a --- /dev/null +++ b/pystella/fourier/spectra.py @@ -0,0 +1,325 @@ +__copyright__ = "Copyright (C) 2019 Zachary J Weiner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + + +import numpy as np +import pyopencl.array as cla +import loopy as lp + +from warnings import filterwarnings +filterwarnings('ignore', category=lp.diagnostic.LoopyAdvisory, + message="could not find a conflict-free mem layout") +from pyopencl.characterize import CLCharacterizationWarning +filterwarnings('ignore', category=CLCharacterizationWarning) + + +class PowerSpectra: + """ + A class for computing power spectra of fields. + + .. automethod:: __init__ + .. automethod:: __call__ + .. automethod:: bin_power + .. automethod:: polarization + .. automethod:: gw + """ + + def __init__(self, decomp, fft, dk, volume, **kwargs): + """ + :arg decomp: A :class:`DomainDecomposition`. + + :arg fft: An FFT object as returned by :func:`DFT`. + + :arg dk: A 3-:class:`tuple` of the momentum-space grid spacing of each + axis (i.e., the infrared cutoff of the grid in each direction). + + :arg volume: The physical volume of the grid. + + The following keyword-only arguments are also recognized: + + :arg bin_with: A :class:`float` specifying the bin width to use. + Defaults to ``min(dk)``. + """ + + self.decomp = decomp + self.fft = fft + self.grid_shape = fft.grid_shape + self.proc_shape = decomp.proc_shape + + self.dtype = fft.dtype + self.cdtype = fft.cdtype + self.kshape = self.fft.shape(True) + + self.dk = dk + self.bin_width = kwargs.pop('bin_width', min(dk)) + + d3x = volume / np.product(self.grid_shape) + self.norm = (1 / 2 / np.pi**2 / volume) * d3x**2 + + sub_k = list(x.get() for x in self.fft.sub_k.values()) + kvecs = np.meshgrid(*sub_k, indexing='ij', sparse=False) + rkmags = np.sqrt(sum((dki * ki)**2 for dki, ki in zip(self.dk, kvecs))) + + counts = 2. * np.ones_like(rkmags) + counts[kvecs[2] == 0] = 1. + counts[kvecs[2] == self.grid_shape[-1]//2] = 1. + + from mpi4py import MPI + max_k = self.decomp.allreduce(np.max(rkmags), MPI.MAX) + self.num_bins = int(max_k / self.bin_width + .5) + 1 + bins = np.arange(-.5, self.num_bins + .5) * self.bin_width + + sub_bin_counts = np.histogram(rkmags, weights=counts, bins=bins)[0] + self.bin_counts = self.decomp.allreduce(sub_bin_counts) + + self.real_spectra_knl = self.make_spectra_knl(True, self.kshape[-1]) + # FIXME: get complex Nz better + _Nz = self.grid_shape[-1] // self.proc_shape[1] + self.complex_spectra_knl = self.make_spectra_knl(False, _Nz) + + self.pool = None + + def make_spectra_knl(self, is_real, Nz): + knl = lp.make_kernel( + "[NZ, Nx, Ny, Nz, num_bins, is_real] -> \ + { [i,j,k,b]: 0<=i k_i = momenta_x[i] + <> k_j = momenta_y[j] + <> k_k = momenta_z[k] + <> kmag = sqrt((dki * k_i)**2 + (dkj * k_j)**2 + (dkk * k_k)**2) + bin = round(kmag / bin_width) + <> count = if(is_real and k_k > 0 and k_k < NZ/2, 2., 1.) + <> power = abs(fk[i, j, k])**2 * kmag**k_power * count + temp[bin] = temp[bin] + power {id=tmp, dep=init, atomic} + end + for b + spectrum[b] = spectrum[b] + temp[b] {id=glb, dep=tmp, atomic} + end + end + """, + [ + lp.GlobalArg("spectrum", self.dtype, shape=(self.num_bins,), + for_atomic=True), + lp.GlobalArg("momenta_x", self.dtype, shape=('Nx',)), + lp.GlobalArg("momenta_y", self.dtype, shape=('Ny',)), + lp.GlobalArg("momenta_z", self.dtype, shape=('Nz',)), + lp.TemporaryVariable("temp", self.dtype, shape=(self.num_bins,), + for_atomic=True, + address_space=lp.AddressSpace.LOCAL), + lp.ValueArg("k_power, bin_width, dki, dkj, dkk", self.dtype), + "..." + ], + default_offset=lp.auto, + silenced_warnings=['write_race(tmp)', 'write_race(glb)'], + seq_dependencies=True, + lang_version=(2018, 2), + ) + # FIXME: count incorrect for complex? + + knl = lp.fix_parameters(knl, NZ=self.grid_shape[-1], num_bins=self.num_bins, + dki=self.dk[0], dkj=self.dk[1], dkk=self.dk[2], + Nz=Nz, is_real=is_real) + knl = lp.split_iname(knl, "k", Nz, outer_tag="g.0", inner_tag="l.0") + knl = lp.split_iname(knl, "b", min(1024, self.num_bins), + outer_tag="g.0", inner_tag="l.0") + knl = lp.tag_inames(knl, "j:g.1") + return knl + + def bin_power(self, fk, queue=None, k_power=3, is_real=True): + """ + Computes the binned power spectrum of a momentum-space field, + + .. math:: + + \\Delta_f^2(k) + = \\frac{1}{2 \\pi^2 V} \\int \\mathrm{d} \\Omega \\, + \\left\\vert \\mathbf{k} \\right\\vert^n + \\left\\vert f(\\mathbf{k}) \\right\\vert^2 + + where ``k_power`` specifies the value of :math:`n`. + + :arg fk: The array containing the complex-valued, + momentum-space field whose power spectrum is to be computed. + + The following keyword arguments are recognized: + + :arg queue: A :class:`pyopencl.CommandQueue`. + Defaults to ``fk.queue``. + + :arg k_power: The exponent :math:`n` to use on :math:`\\vert k \\vert`. + Defaults to 3 (to compute the "dimensionless" power spectrum). + """ + + queue = queue or fk.queue + + if self.pool is None: + import pyopencl.tools as clt + self.pool = clt.MemoryPool(clt.ImmediateAllocator(queue)) + + if is_real: + evt, (spectrum,) = \ + self.real_spectra_knl(queue, allocator=self.pool, fk=fk, + k_power=k_power, **self.fft.sub_k, + bin_width=self.bin_width) + else: + raise NotImplementedError('complex spectra, at least distributed') + evt, (spectrum,) = \ + self.complex_spectra_knl(queue, allocator=self.pool, fk=fk, + k_power=k_power, **self.fft.sub_k_c, + bin_width=self.bin_width) + + full_spectrum = self.decomp.allreduce(spectrum.get()) + return full_spectrum / self.bin_counts + + def __call__(self, fx, queue=None, k_power=3): + """ + Computes the power spectrum of the position-space field ``fx`` by first + Fourier transforming ``fx`` and then calling :meth:`bin_power`. + + :arg fx: The array containing the position-space field + whose power spectrum is to be computed. + If ``fx`` has more than three axes, all the outer axes are looped over. + As an example, if ``f`` has shape ``(2, 3, 130, 130, 130)``, + this method loops over the outermost two axes with shape ``(2, 3)``, and + the resulting output data would have the shape ``(2, 3, num_bins)``. + + The following keyword arguments are recognized: + + :arg queue: A :class:`pyopencl.CommandQueue`. + Defaults to ``fx.queue``. + + :arg k_power: The exponent :math:`n` to use on :math:`\\vert k \\vert`. + Defaults to 3 (to compute the "dimensionless" power spectrum). + """ + + queue = queue or fx.queue + is_real = fx.dtype == np.float64 or fx.dtype == np.float32 + + outer_shape = fx.shape[:-3] + from itertools import product + slices = list(product(*[range(n) for n in outer_shape])) + + result = np.zeros(outer_shape+(self.num_bins,), self.dtype) + for s in slices: + fk = self.fft.dft(fx[s]) + result[s] = self.bin_power(fk, queue, k_power, is_real) + + return self.norm * result + + def polarization(self, vector, projector, queue=None, k_power=3): + """ + Computes the power spectra of the plus and minus polarizations of a vector + field. + + :arg vector: The array containing the position-space vector field + whose power spectrum is to be computed. + If ``vector`` has more than four axes, all the outer axes are + looped over. + As an example, if ``vector`` has shape ``(2, 3, 3, 130, 130, 130)`` + (where the fourth-to-last axis is the vector-component axis), + this method loops over the outermost two axes with shape ``(2, 3)``, and + the resulting output data would have the shape ``(2, 3, 2, num_bins)`` + (where the second-to-last axis is the polarization axis). + + :arg projector: A :class:`Projector`. + + The remaining arguments are the same as those to :meth:`__call__`. + """ + + queue = queue or vector.queue + + vec_k = cla.empty(queue, (3,)+self.kshape, self.cdtype) + # overwrite vec_k + plus = vec_k[0] + minus = vec_k[1] + + outer_shape = vector.shape[:-4] + from itertools import product + slices = list(product(*[range(n) for n in outer_shape])) + + result = np.zeros(outer_shape+(2, self.num_bins,), self.dtype) + for s in slices: + for mu in range(3): + self.fft.dft(vector[s][mu], vec_k[mu]) + + projector.vec_to_pol(queue, plus, minus, vec_k) + result[s][0] = self.bin_power(plus, queue=queue, k_power=k_power) + result[s][1] = self.bin_power(minus, queue=queue, k_power=k_power) + + return self.norm * result + + def gw(self, hij, projector, hubble, queue=None, k_power=3): + """ + Computes the present, transverse-traceless gravitational wave power spectrum. + + .. math:: + + \\Delta_t^2(k) + = \\frac{1}{24 \\pi^{2} \\mathcal{H}^{2}} + \\frac{1}{V} + \\sum_{i, j} \\int \\mathrm{d} \\Omega \\, + \\left\\vert \\mathbf{k} \\right\\vert^3 + \\left\\vert h_{i j}^{\\prime}(k) \\right \\vert^{2} + + :arg hij: The array containing the + position-space tensor field whose power spectrum is to be computed. + Must be 4-dimensional, with the first axis being length-6. + + :arg projector: A :class:`Projector`. + + :arg hubble: The current value of the conformal Hubble parameter. + + The remaining arguments are the same as those to :meth:`__call__`. + """ + + queue = queue or hij.queue + + hij_k = cla.empty(queue, (6,)+self.kshape, dtype=self.cdtype) + + for mu in range(6): + self.fft.dft(hij[mu], hij_k[mu]) + + def tensor_id(i, j): + a = i if i <= j else j + b = j if i <= j else i + return (7 - a) * a // 2 - 4 + b + + gw_spec = [] + projector.transverse_traceless(queue, hij_k) + for mu in range(6): + spec = self.bin_power(hij_k[mu], queue=queue, k_power=k_power) + gw_spec.append(spec) + + gw_tot = sum(gw_spec[tensor_id(i, j)] + for i in range(1, 4) for j in range(1, 4)) + + return self.norm / 12 / hubble**2 * gw_tot diff --git a/pystella/multigrid/__init__.py b/pystella/multigrid/__init__.py new file mode 100644 index 0000000..61cdcd4 --- /dev/null +++ b/pystella/multigrid/__init__.py @@ -0,0 +1,493 @@ +__copyright__ = "Copyright (C) 2019 Zachary J Weiner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + + +import numpy as np +import pyopencl.array as cla +from pystella.multigrid.transfer import (Injection, FullWeighting, + LinearInterpolation, CubicInterpolation) +from pystella.multigrid.relax import JacobiIterator, NewtonIterator + +__doc__ = """ +.. currentmodule:: pystella.multigrid +.. autoclass:: FullApproximationScheme +.. autoclass:: MultiGridSolver + +.. _multigrid-cycles: + +Multigrid cycles +^^^^^^^^^^^^^^^^ + +Multigrid cycles are represnted as a sequence of levels to visit and how many +smoothing iterations to perform on each. +Level ``i`` denotes the level with a factor ``2**i`` fewer gridpoints +in each dimension (relative to the finest grid). +The following utilities can be used to generate particular types ofcycles +by specifying, e.g., the coarsest level to visit and how many iterations +to perform on these levels. + +.. autofunction:: mu_cycle +.. autofunction:: v_cycle +.. autofunction:: w_cycle +.. autofunction:: f_cycle +""" + + +def mu_cycle(mu, i, nu1, nu2, max_depth): + """ + A utility for generating a generic :math:`\\mu`-cycle. + + :arg mu: The order of the cycle. See... + + :arg i: The initial and final (i.e., finest) level to traverse from/to. + + :arg nu1: The number of iterations to perform on each level after a + transition to the next coarser level. + + :arg nu2: The number of iterations to perform on each level after a + transition to the next finer level. + + :arg max_depth: The lowest level to traverse to. + + :returns: A generic multigrid cycle in the form of a :class:`list` of + :class:`tuple`'s ``(level, iterations)``, representing the order of levels + to visit and how many smoothing iterations to perform on each. + """ + + if i == max_depth: + return [(i, nu2)] + else: + x = mu_cycle(mu, i+1, nu1, nu2, max_depth) + return [(i, nu1)] + x + x[1:]*(mu-1) + [(i, nu2)] + + +def v_cycle(nu1, nu2, max_depth): + """ + A utility for generating a V-cycle. + + Example:: + + >>> v_cycle(10, 20, 3) + [(0, 10), (1, 10), (2, 10), (3, 20), (2, 20), (1, 20), (0, 20)] + + :arg nu1: The number of iterations to perform on each level after a + transition to the next coarser level. + + :arg nu2: The number of iterations to perform on each level after a + transition to the next finer level. + + :arg max_depth: The lowest level to traverse to. + + :returns: A V-cycle in the form of a :class:`list` of + :class:`tuple`'s ``(level, iterations)``, representing the order of levels + to visit and how many smoothing iterations to perform on each. + """ + + return mu_cycle(1, 0, nu1, nu2, max_depth) + + +def w_cycle(nu1, nu2, max_depth): + """ + A utility for generating a W-cycle. + + Example:: + + >>> w_cycle(10, 20, 3) + [(0, 10), (1, 10), (2, 10), (3, 20), (2, 20), (3, 20), (2, 20), (1, 20), + (2, 10), (3, 20), (2, 20), (3, 20), (2, 20), (1, 20), (0, 20)] + + :arg nu1: The number of iterations to perform on each level after a + transition to the next coarser level. + + :arg nu2: The number of iterations to perform on each level after a + transition to the next finer level. + + :arg max_depth: The lowest level to traverse to. + + :returns: A W-cycle in the form of a :class:`list` of + :class:`tuple`'s ``(level, iterations)``, representing the order of levels + to visit and how many smoothing iterations to perform on each. + """ + + return mu_cycle(2, 0, nu1, nu2, max_depth) + + +def _cycle(i, j, k, nu1, nu2): + down = [(a, nu1) for a in range(i, j)] + up = [(a, nu2) for a in range(j, k-1, -1)] + return down + up + + +def f_cycle(nu1, nu2, max_depth): + """ + A utility for generating a F-cycle. + + Example:: + + >>> f_cycle(10, 20, 3) + [(0, 10), (1, 10), (2, 10), (3, 20), (2, 20), (3, 20), (2, 20), (1, 20), + (2, 10), (3, 20), (2, 20), (1, 20), (0, 20)] + + :arg nu1: The number of iterations to perform on each level after a + transition to the next coarser level. + + :arg nu2: The number of iterations to perform on each level after a + transition to the next finer level. + + :arg max_depth: The lowest level to traverse to. + + :returns: An F-cycle in the form of a :class:`list` of + :class:`tuple`'s ``(level, iterations)``, representing the order of levels + to visit and how many smoothing iterations to perform on each. + """ + + cycle = _cycle(0, max_depth, max_depth-1, nu1, nu2) + for top in range(max_depth-1, 0, -1): + cycle += _cycle(top+1, max_depth, top-1, nu1, nu2) + return cycle + + +class FullApproximationScheme: + """ + A class for solving generic systems of boundary-value problems using the + Full Approximation Scheme. + + .. automethod:: __init__ + .. automethod:: __call__ + + The below methods are documented for development's sake, but are not + intended to be called by the user. + + .. automethod:: coarse_array_like + .. automethod:: transfer_down + .. automethod:: transfer_up + .. automethod:: smooth + .. automethod:: coarse_level_like + .. automethod:: setup + """ + + def __init__(self, solver, h, **kwargs): + """ + :arg solver: A instance of a subclass of :class:`relax.RelaxationBase` + (e.g., :class:`JacobiIterator` or :class:`NewtonIterator`). + + :arg h: The number of halo padding layers on each face of the numerical grid. + + The following keyword-only arguments are recognized: + + :arg Restrictor: A mapper which restricts arrays from a fine + to a coarser level. + Defaults to :class:`FullWeighting`. + + :arg Interpolator: A mapper which interpolates arrays from a coarse + to a finer level. + Defaults to :class:`LinearInterpolation`. + """ + + self.solver = solver + self.h = h + + Restrictor = kwargs.pop('Restrictor', FullWeighting) + self.restrict = Restrictor(h=h) + self.restrict_and_correct = Restrictor(h=h, correct=True) + + Interpolator = kwargs.pop('Interpolator', LinearInterpolation) + self.interpolate = Interpolator(h=h) + self.interpolate_and_correct = Interpolator(h=h, correct=True) + + self.unknowns = {} + self.rhos = {} + self.auxiliaries = {} + self.tmp = {} + self.resid = {} + self.dx = {} + self.decomp = {} + self.smooth_args = {} + self.resid_args = {} + + def coarse_array_like(self, f1h): + """ + :arg f1h: A :class:`pyopencl.array.Array`. + Its unpadded shape will be inferred by subtracting ``2 * self.h`` + from each axis of its shape. + + :returns: A :class:`pyopencl.array.Array` with padded shape for a + grid with half as many points in each dimension of ``f1h``. + """ + + def halve_and_pad(i): + return (i - 2 * self.h)//2 + 2 * self.h + + coarse_shape = tuple(map(halve_and_pad, f1h.shape)) + f2h = cla.zeros(f1h.queue, shape=coarse_shape, dtype=f1h.dtype) + return f2h + + def transfer_down(self, queue, i): + """ + Transfers all arrays from a fine to the next-coarser level. + + :arg queue: A :class:`pyopencl.CommandQueue`. + + :arg i: The level from to transfer to. + """ + + for k, f1 in self.unknowns[i-1].items(): + f2 = self.unknowns[i][k] + self.restrict(queue, f1=f1, f2=f2) + self.decomp[i].share_halos(queue, f2) + + self.solver.residual(queue, **self.resid_args[i-1]) + + for k, r1 in self.resid[i-1].items(): + r2 = self.resid[i][k] + self.decomp[i-1].share_halos(queue, r1) + self.restrict(queue, f1=r1, f2=r2) + + self.solver.lhs_correction(queue, **self.resid_args[i]) + for k, rho in self.rhos[i].items(): + self.decomp[i].share_halos(queue, rho) + + def transfer_up(self, queue, i): + """ + Transfers all arrays from a coarse to the next-finer level. + + :arg queue: A :class:`pyopencl.CommandQueue`. + + :arg i: The level from to transfer to. + """ + + for k, f1 in self.unknowns[i].items(): + f2 = self.unknowns[i+1][k] + self.restrict_and_correct(queue, f1=f1, f2=f2) + self.decomp[i+1].share_halos(queue, f2) + self.interpolate_and_correct(queue, f1=f1, f2=f2) + self.decomp[i].share_halos(queue, f1) + + def smooth(self, queue, i, nu): + """ + Invokes the relaxation solver, computing the error before and after. + + :arg queue: A :class:`pyopencl.CommandQueue`. + + :arg i: On which level to perform the smoothing. + + :arg nu: The number of smoothing iterations to perform. + + :returns: A list containing the errors before and after of the form + ``[(i, error_before), (i, error_after)]``. + """ + + errs1 = self.solver.get_error(queue, **self.resid_args[i]) + self.solver(self.decomp[i], queue, iterations=nu, **self.smooth_args[i]) + errs2 = self.solver.get_error(queue, **self.resid_args[i]) + return [(i, errs1), (i, errs2)] + + def coarse_level_like(self, dict_1): + """ + A wrapper to :meth:`coarse_array_like` with returns a :class:`dict` + like ``dict_1`` whose values are new :class:`pyopencl.array.Array`'s + with shape appropriate for the next-coarser level. + """ + + dict_2 = {} + for k, f1 in dict_1.items(): + dict_2[k] = self.coarse_array_like(f1) + return dict_2 + + def setup(self, decomp0, queue, dx0, depth, **kwargs): + """ + Performs the inital setup and array allocation for each required level. + Creates instances of :class:`~pystella.DomainDecomposition` for each level + and all arrays needed on each level. + Called automatically by :meth:`__call__`. + + :arg decomp0: An instance of :class:`~pystella.DomainDecomposition` + constructed for the finest level. + + :arg queue: A :class:`pyopencl.CommandQueue`. + + :arg dx0: The grid-spacing on the finest level. + + :arg depth: The coarsest level to traverse to. That is, the deepest level + which will be used has a factor ``2**depth`` fewer gridpoints than the + finest level. + + All unknowns and ``rho`` arrays must be passed by keyword. + Any additional keyword arguments are interpreted as auxillary arrays which + must be available on all levels. + """ + + self.decomp[0] = decomp0 + self.dx[0] = np.array(dx0) + + self.unknowns[0] = {} + self.rhos[0] = {} + for k, v in self.solver.f_to_rho_dict.items(): + self.unknowns[0][k] = kwargs.pop(k) + self.rhos[0][v] = kwargs.pop(v) + + self.auxiliaries[0] = kwargs + + if 0 not in self.tmp: + self.tmp[0] = {} + self.resid[0] = {} + for k, f in self.unknowns[0].items(): + self.tmp[0]['tmp_'+k] = cla.zeros_like(f) + self.resid[0]['r_'+k] = self.tmp[0]['tmp_'+k] + + for i in range(depth+1): + if i not in self.dx: + self.dx[i] = np.array(self.dx[i-1] * 2) + + if i not in self.decomp: + ng_2 = tuple(ni // 2 for ni in self.decomp[i-1].rank_shape) + from pystella import DomainDecomposition + self.decomp[i] = \ + DomainDecomposition(self.decomp[i-1].proc_shape, + self.h, ng_2) + + if i not in self.unknowns: + self.unknowns[i] = self.coarse_level_like(self.unknowns[i-1]) + + if i not in self.tmp: + self.tmp[i] = self.coarse_level_like(self.tmp[i-1]) + self.resid[i] = {} + for k, f in self.unknowns[i].items(): + self.resid[i]['r_'+k] = self.tmp[i]['tmp_'+k] + + if i not in self.rhos: + self.rhos[i] = self.coarse_level_like(self.rhos[i-1]) + + if i not in self.auxiliaries: + self.auxiliaries[i] = self.coarse_level_like(self.auxiliaries[i-1]) + for k, f1 in self.auxiliaries[i-1].items(): + f2 = self.auxiliaries[i][k] + self.restrict(queue, f1=f1, f2=f2) + self.decomp[i].share_halos(queue, f2) + + if i not in self.smooth_args: + self.smooth_args[i] = {**self.unknowns[i], **self.rhos[i], + **self.auxiliaries[i], **self.tmp[i]} + self.smooth_args[i]['dx'] = np.array(self.dx[i]) + + if i not in self.resid_args: + self.resid_args[i] = {**self.unknowns[i], **self.rhos[i], + **self.auxiliaries[i], **self.resid[i]} + self.resid_args[i]['dx'] = np.array(self.dx[i]) + + def __call__(self, decomp0, queue, dx0, cycle=None, **kwargs): + """ + Executes a specified multigrid cycle. + + :arg decomp0: An instance of :class:`~pystella.DomainDecomposition` + constructed for the finest level. + + :arg queue: A :class:`pyopencl.CommandQueue`. + + :arg dx0: The grid-spacing on the finest level. + + :arg cycle: The multigrid cycle to execute. + See :ref:`multigrid-cycles` for details on how these are specified + and for utilities to generate them. + + All required arrays must be passed by keyword. + """ + + if cycle is None: + grid_shape = tuple(ni * pi + for ni, pi in zip(decomp0.rank_shape, + decomp0.proc_shape)) + depth = int(np.log2(min(grid_shape) / 8)) + cycle = v_cycle(25, 50, depth) + + depth = max([i for i, nu in cycle]) + self.setup(decomp0, queue, dx0, depth, **kwargs) + + nu0 = cycle[0][1] + level_errors = self.smooth(queue, 0, nu0) + + previous = 0 + for i, nu in cycle[1:]: + if i == previous + 1: + self.transfer_down(queue, i) + elif i == previous - 1: + self.transfer_up(queue, i) + else: + raise ValueError('consecutive levels must be spaced by one') + level_errors += self.smooth(queue, i, nu) + previous = i + + return level_errors + + +class MultiGridSolver(FullApproximationScheme): + """ + A class for solving systems of linear boundary-value problems using linear + Multigrid. + Usage is identical to :class:`FullApproximationScheme`. + + .. warning:: + + Convergence is currently slower than expected, suggesting a possible + problem with the lower levels. + :class:`FullApproximationScheme` is perfectly suited to solve linear problems + as well. + + The scheme is implemented by subclassing :class:`FullApproximationScheme`, with + the only differences in the level transfer functionality (which are not intended + to be called by the user). + + .. automethod transfer_down + .. automethod transfer_up + """ + + # FIXME: convergence slow, possible issue with coarse levels? + def transfer_down(self, queue, i): + self.solver.residual(queue, **self.resid_args[i-1]) + + for f, rho in self.solver.f_to_rho_dict.items(): + r1 = self.resid[i-1]['r_'+f] + self.decomp[i-1].share_halos(queue, r1) + r2 = self.rhos[i][rho] + self.restrict(queue, f1=r1, f2=r2) + self.decomp[i].share_halos(queue, r2) + + def transfer_up(self, queue, i): + for k, f1 in self.unknowns[i].items(): + f2 = self.unknowns[i+1][k] + self.interpolate_and_correct(queue, f1=f1, f2=f2) + self.decomp[i].share_halos(queue, f1) + + +__all__ = [ + 'Injection', + 'FullWeighting', + 'LinearInterpolation', + 'CubicInterpolation', + 'JacobiIterator', + 'NewtonIterator', + 'FullApproximationScheme', + 'MultiGridSolver', + 'v_cycle', + 'w_cycle', + 'f_cycle', +] diff --git a/pystella/multigrid/relax.py b/pystella/multigrid/relax.py new file mode 100644 index 0000000..e44739e --- /dev/null +++ b/pystella/multigrid/relax.py @@ -0,0 +1,376 @@ +__copyright__ = "Copyright (C) 2019 Zachary J Weiner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + + +import numpy as np +import loopy as lp +from pystella import Field, get_field_args, Stencil + +__doc__ = """ +.. currentmodule:: pystella.multigrid +.. autoclass:: pystella.multigrid.relax.RelaxationBase +.. autoclass:: JacobiIterator +.. autoclass:: NewtonIterator +""" + + +class RelaxationBase: + """ + Base class for relaxation-based iterative solvers to solve + boundary-value problems of the form + + .. math:: + + L(f) = \\rho. + + Here :math:`\\rho` is not a function of :math:`f`, but :math:`L(f)` + may in principle be an arbitrary (nonlinear differential) function + of :math:`f` (assuming a subclass's implemented solver is appropriate + for such an equation). + + .. automethod:: __init__ + .. automethod:: __call__ + .. automethod:: get_error + + A subclass implements a particular iterative solver by providing + a :meth:`step_operator` method. + + .. automethod:: step_operator + + The below methods are documented for development's sake, but are not + intended to be called by the user: + + .. automethod:: make_stepper + .. automethod:: make_lhs_kernel + .. automethod:: make_residual_kernel + .. automethod:: make_resid_stats + + The following methods related to solving additional constraints on + systems with periodic boundary conditions are incomplete: + + .. automethod:: make_shift_kernel + .. automethod:: eval_constraint + .. automethod:: solve_constraint + """ + + def __init__(self, decomp, queue, lhs_dict, MapKernel=Stencil, **kwargs): + """ + :arg decomp: A :class:`~pystella.DomainDecomposition`. + + :arg queue: A :class:`pyopencl.CommandQueue`. + + :arg lhs_dict: A :class:`dict` representing the set of equations to be + solved, whose keys must be :class:`~pystella.Field`'s representing the + unknown degrees of freedom and values are :class:`tuple`'s + ``(lhs, rho)`` representing the left-hand side :math:`L(f)` + and right-hand side :math:`\\rho` of that unknown's equation. + + The following keyword arguments are recognized: + + :arg MapKernel: The kernel class which the required mapping kernels will + be instances of---i.e., one of :class:`~pystella.ElementWiseMap` or its + subclasses. Defaults to :class:`~pystella.Stencil`. + + :arg unknown_args: A list of :class:`loopy.ArrayArg`'s representing + the unknown degrees of freedom. + Defaults to *None*, in which case the correct arguments + (in particular, their shapes) are (attempted to be) inferred + from the keys of ``lhs_dict``. + + :arg rho_args: A list of :class:`loopy.ArrayArg`'s representing + the static right-hand side arrays (i.e., those independent + of the degrees of freedom). + Defaults to *None*, in which case the correct arguments + (in particular, their shapes) are (attempted to be) inferred + from the values of ``lhs_dict``. + + Any remaining keyword arguments are passed to each of the kernel + creation routines. + """ + + self.decomp = decomp + self.lhs_dict = lhs_dict + self.h = kwargs.get('h') + + # get GlobalArgs of unknowns, or infer from lhs_dict.keys() + self.unknown_args = kwargs.pop('unknown_args', None) + if self.unknown_args is None: + self.unknown_args = get_field_args(list(lhs_dict.keys())) + + def array_args_like(args, prefix='', suffix=''): + return [lp.GlobalArg(prefix+arg.name+suffix, + shape=arg.shape, dtype=arg.dtype) + for arg in args] + + self.temp_args = array_args_like(self.unknown_args, prefix='tmp_') + self.residual_args = array_args_like(self.unknown_args, prefix='r_') + + # get GlobalArgs of unknowns, or infer from lhs_dict.keys() + self.rho_args = kwargs.pop('rho_args', None) + if self.rho_args is None: + rho_list = [lhs[1] for lhs in lhs_dict.values()] + self.rho_args = get_field_args(rho_list) + + self.f_to_rho_dict = {} + for f, (lhs, rho) in self.lhs_dict.items(): + self.f_to_rho_dict[f.child.name] = rho.child.name + + self.make_stepper(MapKernel, **kwargs) + self.make_lhs_kernel(MapKernel, **kwargs) + self.make_residual_kernel(MapKernel, **kwargs) + self.make_resid_stats(decomp, queue, **kwargs) + self.make_shift_kernel(**kwargs) + + def step_operator(self, f, lhs, rho): + """ + :arg f: The unknown field for which a relaxation step instruction + will be generated. + + :arg lhs: :math:`L(f)` for the unknown ``f``'s equation. + + :arg rho: :math:`\\rho` for the unknown ``f``'s equation. + """ + + raise NotImplementedError + + def make_stepper(self, MapKernel, **kwargs): + self.step_dict = {} + for f, (lhs, rho) in self.lhs_dict.items(): + tmp = Field('tmp_'+f.child.name, offset=f.offset) + self.step_dict[tmp] = self.step_operator(f, lhs, rho) + + args = self.unknown_args + self.rho_args + self.temp_args + self.stepper = MapKernel(self.step_dict, args=args, **kwargs) + + def step(self, queue, **kwargs): + self.stepper(queue, **kwargs) + + def __call__(self, decomp, queue, iterations=100, **kwargs): + """ + Executes a number of iterations of relaxation. + + :arg decomp: A :class:`~pystella.DomainDecomposition`. + + .. note:: + + ``decomp`` is intended to (and should) be different from the + :attr:`decomp` passed to :meth:`__init__`, as each multigrid level + requires a different :class:`~pystella.DomainDecomposition`. + + :arg queue: A :class:`pyopencl.CommandQueue`. + + The following keyword arguments are recognized: + + :arg iterations: The number of iterations to execute. + Defaults to ``100``. + + :arg solve_constraint: + Defaults to *False*. + + All arrays required for the relaxation step must be passed by keyword. + """ + + solve_constraint = kwargs.pop('solve_constraint', False) + + even_iterations = iterations if iterations % 2 == 0 else iterations + 1 + for i in range(even_iterations): + self.stepper(queue, **kwargs) + for arg in self.unknown_args: + f = arg.name + kwargs[f], kwargs['tmp_'+f] = kwargs['tmp_'+f], kwargs[f] + decomp.share_halos(queue, kwargs[f]) + + if solve_constraint: + self.solve_constraint(queue, **kwargs) + + def make_lhs_kernel(self, MapKernel, **kwargs): + tmp_dict = {} + lhs_dict = {} + from pymbolic import var + tmp_lhs = var('tmp_lhs') + for i, (f, (lhs, rho)) in enumerate(self.lhs_dict.items()): + tmp_dict[tmp_lhs[i]] = lhs + resid = Field('r_'+f.child.name, offset='h') + lhs_dict[rho] = resid + tmp_lhs[i] + + args = self.unknown_args + self.rho_args + self.residual_args + self.lhs_correction = MapKernel(lhs_dict, tmp_dict=tmp_dict, args=args, + **kwargs) + + def make_residual_kernel(self, MapKernel, **kwargs): + residual_dict = {} + for f, (lhs, rho) in self.lhs_dict.items(): + resid = Field('r_'+f.child.name, offset='h') + residual_dict[resid] = rho - lhs + + args = self.unknown_args + self.rho_args + self.residual_args + self.residual = MapKernel(residual_dict, args=args, **kwargs) + + def make_resid_stats(self, decomp, queue, dtype, **kwargs): + reducers = {} + avg_reducers = {} + # from pymbolic.functions import fabs + from pymbolic import var + fabs = var('fabs') + for arg in self.unknown_args: + f = arg.name + resid = Field('r_'+f, offset='h') + reducers[f] = [(fabs(resid), 'max'), (resid**2, 'sum')] + avg_reducers[f] = [(resid, 'sum')] + + args = self.residual_args + from pystella import Reduction + self.resid_stats = Reduction(decomp, reducers, args=args, **kwargs) + self.avg_resid = Reduction(decomp, avg_reducers, args=args, **kwargs) + + def get_error(self, queue, **kwargs): + """ + Computes statistics of the current residual, :math:`L(f) - \\rho`. + + :arg queue: A :class:`pyopencl.CommandQueue`. + + All required arrays must be passed by keyword. + + :returns: A :class:`dict` whose values are :class:`list`'s of the + :math:`L_\\infty` (maximum absolute) and :math:`L_2` (Euclidean) + norms of the residual equation corresponding to the unknown denoted + by the keys of the dictionary. + """ + + self.residual(queue, **kwargs, filter_args=True) + + padded_shape = kwargs.get(self.unknown_args[0].name).shape + rank_shape = tuple(i - 2 * self.h for i in padded_shape) + grid_size = np.product(self.decomp.proc_shape) * np.product(rank_shape) + errs = self.resid_stats(queue, **kwargs, filter_args=True, + rank_shape=rank_shape, grid_size=grid_size) + for k, v in errs.items(): + errs[k][1] = v[1]**.5 + + return errs + + def make_shift_kernel(self, **kwargs): + f = Field('f', offset=0) + tmp = Field('tmp', offset=0) + from pymbolic import var + shift = var('shift') + scale = var('scale') + self.shift_dict = {tmp: scale * f + shift} + + args = ['...'] + from pystella import ElementWiseMap + self.shifter = ElementWiseMap(self.shift_dict, args=args, **kwargs) + + def eval_constraint(self, queue, shifts, scales, **kwargs): + for arg, shift, scale in zip(self.unknown_args, shifts, scales): + f = arg.name + self.shifter(queue, f=kwargs[f], tmp=kwargs['tmp_'+f], + shift=np.array(shift), scale=np.array(scale)) + + padded_shape = kwargs.get(self.unknown_args[0].name).shape + rank_shape = tuple(i - 2 * self.h for i in padded_shape) + grid_size = np.product(self.decomp.proc_shape) * np.product(rank_shape) + + args_to_avg_resid = kwargs.copy() + for arg in self.unknown_args: + f = arg.name + args_to_avg_resid[f] = kwargs['tmp_'+f] + + result = self.avg_resid(queue, **args_to_avg_resid, filter_args=True, + rank_shape=rank_shape, grid_size=grid_size) + return result['avg'] + + def solve_constraint(self, queue, **kwargs): + raise NotImplementedError('constraint solving untested') + + def integral_condition(shifts): + scales = np.ones_like(shifts) + avg = self.eval_constraint(queue, **kwargs, shifts=shifts, scales=scales) + return np.sum(avg) + + from scipy.optimize import root_scalar + x0 = np.zeros(len(self.unknown_args)) + x1 = x0 + 1.e-3 + x0 += - 1.e-3 + sol = root_scalar(integral_condition, x0=x0, x1=x1, method='secant') + if not sol.converged: + print(sol) + else: + shifts = sol.root + scales = np.ones_like(shifts) + for arg, shift, scale in zip(self.unknown_args, shifts, scales): + f = arg.name + self.shifter(queue, f=kwargs[f], tmp=kwargs[f], + shift=np.array(shift), scale=np.array(scale)) + + +class JacobiIterator(RelaxationBase): + """ + A subclass of :class:`RelaxationBase` which implements (damped) Jacobi iteration + for linear systems of the form :math:`L f = \\rho`, where :math:`L` is a linear + operator. + A step of Jacobi iteration takes the form + + .. math:: + + f \\leftarrow (1 - \\omega) f + + \\omega D^{-1} \\left( \\rho - (L - D) f \\right) + + where :math:`D` is the diagonal part of :math:`L`. + In practice :math:`D` is computed by differentiating :math:`L f` with respect to + :math:`f`, which is inappropriate for nonlinear system (which Jacobi + iteration is not intended for). + """ + + def step_operator(self, f, lhs, rho): + from pystella import diff + D = diff(lhs, f) + R_y = lhs - D * f # FIXME: only valid for linear equations + + from pymbolic import var + omega = var('omega') + + return (1 - omega) * f + omega * (rho - R_y) / D + + +class NewtonIterator(RelaxationBase): + """ + A subclass of :class:`RelaxationBase` which implements Newton iteration + for arbitrary systems of the form :math:`L(f) = \\rho`, where :math:`L` + is a generic function of :math:`f`. + A step of Newton iteration takes the form + + .. math:: + + f \\leftarrow f + - \\omega \\frac{L(f) - \\rho}{\\partial L(f) / \\partial f} + + """ + + def step_operator(self, f, lhs, rho): + from pystella import diff + D = diff(lhs, f) + + from pymbolic import var + omega = var('omega') + + return f - omega * (lhs - rho) / D diff --git a/pystella/multigrid/transfer.py b/pystella/multigrid/transfer.py new file mode 100644 index 0000000..7e495d0 --- /dev/null +++ b/pystella/multigrid/transfer.py @@ -0,0 +1,265 @@ +__copyright__ = "Copyright (C) 2019 Zachary J Weiner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + + +import loopy as lp +from pystella import Field +from pystella import Stencil, ElementWiseMap +from pystella.derivs import expand_stencil + +__doc__ = """ +.. currentmodule:: pystella.multigrid +.. autofunction:: pystella.multigrid.transfer.RestrictionBase +.. autofunction:: FullWeighting +.. autofunction:: Injection +.. autofunction:: pystella.multigrid.transfer.InterpolationBase +.. autofunction:: LinearInterpolation +.. autofunction:: CubicInterpolation +""" + + +def RestrictionBase(coefs, StencilKernel, h, **kwargs): + """ + A base function for generating a restriction kernel. + + :arg coefs: The coefficients representing the restriction formula. + Follows the convention of :func:`pystella.derivs.centered_diff` + (since the restriction is applied recursively in each dimension). + + :arg StencilKernel: The stencil mapper to create an instance of. + Defaults to :class:`~pystella.Stencil`. + + :arg h: The number of halo padding layers on each face of the numerical grid. + + :arg lsize: The shape of prefetched arrays in shared memory. + See :class:`~pystella.ElementWiseMap`. + Defaults to ``(4, 4, 4)``. + + :arg correct: A :class:`bool` determining whether to produce a kernel which + corrects an output array by the restricted array, or to only perform + strict restriction. + Defaults to *False*. + + :returns: An instance of ``StencilKernel`` which executes the requested + restriction. + """ + + lsize = kwargs.pop('lsize', (4, 4, 4)) + + # ensure grid dimensions are *not* passed, as they will be misinterpreted + for N in ['Nx', 'Ny', 'Nz']: + _ = kwargs.pop(N, None) + + restrict_coefs = {} + for a, c_a in coefs.items(): + for b, c_b in coefs.items(): + for c, c_c in coefs.items(): + restrict_coefs[(a, b, c)] = c_a * c_b * c_c + + from pymbolic import parse, var + i, j, k = parse('i, j, k') + f1 = Field('f1', offset='h', indices=(2*i, 2*j, 2*k)) + f2 = Field('f2', offset='h') + tmp = var('tmp') + + tmp_dict = {tmp: expand_stencil(f1, restrict_coefs)} + + if kwargs.pop('correct', False): + restrict_dict = {f2: f2 - tmp} + else: + restrict_dict = {f2: tmp} + + args = [lp.GlobalArg('f1', shape='(2*Nx+2*h, 2*Ny+2*h, 2*Nz+2*h)'), + lp.GlobalArg('f2', shape='(Nx+2*h, Ny+2*h, Nz+2*h)')] + + if isinstance(StencilKernel, Stencil): + return StencilKernel(restrict_dict, tmp_dict=tmp_dict, args=args, + prefetch_args=['f1'], h=h, lsize=lsize, + **kwargs) + else: + return StencilKernel(restrict_dict, tmp_dict=tmp_dict, args=args, + h=h, lsize=lsize, **kwargs) + + +def FullWeighting(StencilKernel=Stencil, **kwargs): + """ + Creates a full-weighting restriction kernel, which restricts in input array + :math:`f^{(h)}` on the fine grid into an array :math:`f^{(2 h)}` on the + coarse grid by applying + + .. math:: + + f^{(2 h)}_i + = \\frac{1}{4} f^{(h)}_{2 i - 1} + + \\frac{1}{2} f^{(h)}_{2 i} + + \\frac{1}{4} f^{(h)}_{2 i + 1} + + in each dimension. + + See :class:`transfer.RestrictionBase`. + """ + + from pymbolic.primitives import Quotient + coefs = {-1: Quotient(1, 4), 0: Quotient(1, 2), 1: Quotient(1, 4)} + return RestrictionBase(coefs, StencilKernel, **kwargs) + + +def Injection(StencilKernel=ElementWiseMap, **kwargs): + """ + Creates an injection kernel, which restricts in input array + :math:`f^{(h)}` on the fine grid into an array :math:`f^{(2 h)}` on the + coarse grid by direct injection: + + .. math:: + + f^{(2 h)}_{i, j ,k} + = f^{(h)}_{2 i, 2 j, 2 k} + + See :class:`transfer.RestrictionBase`. + """ + + coefs = {0: 1} + return RestrictionBase(coefs, StencilKernel, **kwargs) + + +def InterpolationBase(even_coefs, odd_coefs, StencilKernel, h, **kwargs): + """ + A base function for generating a restriction kernel. + + :arg even_coefs: The coefficients representing the interpolation formula + for gridpoints on the coarse and fine grid which coincide in space. + Follows the convention of :func:`pystella.derivs.centered_diff` + (since the restriction is applied recursively in each dimension). + + :arg odd_coefs: Same as ``even_coefs``, but for points on the fine grid which + lie between points on the coarse grid. + + :arg StencilKernel: The stencil mapper to create an instance of. + Defaults to :class:`~pystella.Stencil`. + + :arg h: The number of halo padding layers on each face of the numerical grid. + + :arg correct: A :class:`bool` determining whether to produce a kernel which + corrects an output array by the interpolated array, or to only perform + strict interpolation. + Defaults to *False*. + + :returns: An instance of ``StencilKernel`` which executes the requested + interpolation. + """ + + from pymbolic import parse, var + i, j, k = parse('i, j, k') + f1 = Field('f1', offset='h') + + tmp_dict = {} + tmp = var('tmp') + + import itertools + for parity in tuple(itertools.product((0, 1), (0, 1), (0, 1))): + result = 0 + for a, c_a in odd_coefs.items() if parity[0] else even_coefs.items(): + for b, c_b in odd_coefs.items() if parity[1] else even_coefs.items(): + for c, c_c in odd_coefs.items() if parity[2] else even_coefs.items(): + f2 = Field('f2', offset='h', + indices=((i+a)//2, (j+b)//2, (k+c)//2)) + result += c_a * c_b * c_c * f2 + + tmp_dict[tmp[parity]] = result + + def is_odd(expr): + from pymbolic.primitives import If, Comparison, Remainder + return If(Comparison(Remainder(expr, 2), '==', 1), 1, 0) + + a, b, c = parse('a, b, c') + for ind, val in zip((i, j, k), (a, b, c)): + tmp_dict[val] = is_odd(ind) + + if kwargs.pop('correct', False): + interp_dict = {f1: f1 + tmp[a, b, c]} + else: + interp_dict = {f1: tmp[a, b, c]} + + args = [lp.GlobalArg('f1', shape='(Nx+2*h, Ny+2*h, Nz+2*h)'), + lp.GlobalArg('f2', shape='(Nx//2+2*h, Ny//2+2*h, Nz//2+2*h)')] + + return StencilKernel(interp_dict, tmp_dict=tmp_dict, args=args, + prefetch_args=['f2'], h=h, **kwargs) + + +def LinearInterpolation(StencilKernel=Stencil, **kwargs): + """ + Creates an linear interpolation kernel, which interpolates in input array + :math:`f^{(h)}` on the fine grid into an array :math:`f^{(2 h)}` on the + coarse grid via + + .. math:: + + f^{(h)}_{2 i} + &= f^{(2 h)}_{i} + + f^{(h)}_{2 i + 1} + &= \\frac{1}{2} f^{(2 h)}_{i} + \\frac{1}{2} f^{(2 h)}_{i + 1} + + in each dimension. + + See :class:`transfer.InterpolationBase`. + """ + + from pymbolic.primitives import Quotient + odd_coefs = {-1: Quotient(1, 2), 1: Quotient(1, 2)} + even_coefs = {0: 1} + + return InterpolationBase(even_coefs, odd_coefs, StencilKernel, **kwargs) + + +def CubicInterpolation(StencilKernel=Stencil, **kwargs): + """ + Creates an cubic interpolation kernel, which interpolates in input array + :math:`f^{(h)}` on the fine grid into an array :math:`f^{(2 h)}` on the + coarse grid via + + .. math:: + + f^{(h)}_{2 i} + &= f^{(2 h)}_{i} + + f^{(h)}_{2 i + 1} + &= - \\frac{1}{16} f^{(2 h)}_{i - 1} + + \\frac{9}{16} f^{(2 h)}_{i} + + \\frac{9}{16} f^{(2 h)}_{i + 1} + - \\frac{1}{16} f^{(2 h)}_{i + 2} + + in each dimension. + + See :class:`transfer.InterpolationBase`. + """ + + if kwargs.get('h', 0) < 2: + raise ValueError('CubicInterpolation requires padding >= 2') + + from pymbolic.primitives import Quotient + odd_coefs = {-3: Quotient(-1, 16), -1: Quotient(9, 16), + 1: Quotient(9, 16), 3: Quotient(-1, 16)} + even_coefs = {0: 1} + + return InterpolationBase(even_coefs, odd_coefs, StencilKernel, **kwargs) diff --git a/pystella/output.py b/pystella/output.py new file mode 100644 index 0000000..fe1e5d8 --- /dev/null +++ b/pystella/output.py @@ -0,0 +1,177 @@ +__copyright__ = "Copyright (C) 2019 Zachary J Weiner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + + +import numpy as np +import h5py + + +def get_versions(dependencies): + import importlib + import pkg_resources + from pytools import find_module_git_revision + versions = {} + git_revs = {} + for dep in dependencies: + try: + versions[dep] = pkg_resources.get_distribution(dep).version + except ModuleNotFoundError: + versions[dep] = None + try: + file = importlib.import_module(dep.replace('.', '')).__file__ + git_revs[dep] = find_module_git_revision(file, n_levels_up=1) + except ModuleNotFoundError: + git_revs[dep] = None + return versions, git_revs + + +def append(dset, data): + dset.resize(dset.shape[0]+1, axis=0) + dset[-1] = data + + +class OutputFile(h5py.File): + """ + A wrapper to :class:`h5py:File` which collects and saves useful run + information and provides functionality to append to datasets. + + .. automethod:: __init__ + .. automethod:: output + """ + + def create_from_kwargs(self, group, **kwargs): + self.create_group(group) + for key, val in kwargs.items(): + if not isinstance(val, np.ndarray): + val = np.array(val) + shape = (0,) + val.shape + maxshape = (None,) + val.shape + self[group].create_dataset(key, shape=shape, dtype=val.dtype, + maxshape=maxshape, chunks=True) + + def __init__(self, context=None, name=None, runfile=None, **kwargs): + """ + No arguments are required, but the following keyword arguments are + recognized: + + :arg context: A :class:`pyopencl.Context`. If not *None*, information + about the device, driver, and platform is saved to the + :attr:`attrs` dictionary. + Defaults to *None*. + + :arg name: The name of the ``.h5`` (sans the extension) file to create. + If *None*, a unique filename is chosen based on the current date and + time. + Defaults to *None*. + + :arg runfile: A file whose content will be saved as a string to + ``attrs['runfile']``, if not *None*. Useful for attaching the run file + of a simulation to its output. + Defaults to *None*. + + Any remaining keyword arguments are saved to the :attr:`attrs` dictionary. + If any value ``val`` is not of valid type to be saved, the ``val.__name__`` + attribute is saved if the value is a :class:`type` instance, or else + ``str(val)`` is saved. + + Versions and git revisions (when available) of :mod:`pystella` and its + dependencies are saved as ``'versions'`` and ``'git_revs'`` + :class:`h5py:Dataset`'s. The hostname is recorded in the ``'hostname'`` + key of the :attr:`attrs` dictionary. + """ + + if name is None: + import datetime + name = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + + while True: + try: + filename = name + '.h5' + super().__init__(filename, 'x') + break + except OSError: + import time + time.sleep(1) + name = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + + if context is not None: + device, = context.devices + self.attrs['device'] = device.name + self.attrs['driver_version'] = device.driver_version + self.attrs['platform_version'] = device.platform.version + + import socket + self.attrs['hostname'] = socket.gethostname() + + for key, val in kwargs.items(): + try: + self.attrs[key] = val + except: # noqa + if isinstance(val, type): + self.attrs[key] = val.__name__ + else: + self.attrs[key] = str(val) + + if runfile is not None: + fp = open(runfile, "r") + content = fp.read() + fp.close() + self.attrs['runfile'] = content + + # output current dependency versions + dependencies = ['pystella', 'numpy', 'scipy', + 'pyopencl', 'loo.py', 'pymbolic', + 'mpi4py', 'gpyfft', 'mpi4py_fft', 'h5py'] + versions, git_revs = get_versions(dependencies) + + self.create_group('versions') + for k, v in versions.items(): + self['versions'][k] = v + + self.create_group('git_revs') + for k, v in git_revs.items(): + self['git_revs'][k] = '' if v is None else v + + def output(self, group, **kwargs): + """ + Appends values to datasets within a :class:`h5py:Group` named ``group``. + ``group`` is created if it does not exist, and the :class:`h5py:Dataset`'s + of this :class:`h5py:Group` are determined by the keys of keyword arguments. + If ``group`` already exists, iterates over each :class:`h5py:Dataset` and + appends values from keyword arguments (matching :class:`h5py:Dataset` + names to keys). + + :arg group: The :class:`h5py:Group` to append :class:`h5py:Dataset` + values to. + + If ``group`` already exists, a keyword argument for each + :class:`h5py:Dataset` in ``group`` must be provided. + """ + + # create group and datasets if they don't exist + if group not in self: + self.create_from_kwargs(group, **kwargs) + + # ensure that all fields are provided + for key in self[group]: + val = kwargs.pop(key) + append(self[group][key], val) diff --git a/pystella/reduction.py b/pystella/reduction.py new file mode 100644 index 0000000..dbb2d45 --- /dev/null +++ b/pystella/reduction.py @@ -0,0 +1,356 @@ +__copyright__ = "Copyright (C) 2019 Zachary J Weiner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + + +import numpy as np +import pyopencl.array as cla +import loopy as lp + +from warnings import filterwarnings +from loopy.diagnostic import ParameterFinderWarning +filterwarnings('ignore', category=ParameterFinderWarning) + +__doc__ = """ +.. currentmodule:: pystella +.. autoclass:: Reduction +""" + + +def get_mpi_reduction_op(op): + from mpi4py import MPI + _MPI_REDUCTION_OPS = { + "sum": MPI.SUM, + "product": MPI.PROD, + "max": MPI.MAX, + "min": MPI.MIN, + } + + if op in _MPI_REDUCTION_OPS: + return _MPI_REDUCTION_OPS[op] + else: + raise NotImplementedError('MPI allreduce for operation %s' % op) + + +def get_numpy_reduction_op(op): + _NUMPY_REDUCTION_OPS = { + "sum": np.sum, + "product": np.prod, + "max": np.max, + "min": np.min, + } + + if op in _NUMPY_REDUCTION_OPS: + return _NUMPY_REDUCTION_OPS[op] + else: + raise NotImplementedError('numpy reduction for operation %s' % op) + + +def red_stmnt(assignee, expr, op): + from pystella import Indexer + red = lp.symbolic.Reduction(operation=op, + inames=('i'), + expr=Indexer(expr), + allow_simultaneous=True) + return lp.Assignment(assignee, red) + + +class Reduction: + """ + An interface to :func:`loopy.make_kernel` which computes (an arbitrary + number of) reductions. + + .. automethod:: __init__ + .. automethod:: __call__ + """ + def make_reduce_knl(self, statements, args): + knl = lp.make_kernel( + "[Nx, Ny, Nz] -> {[i,j,k]: 0<=i 1: + raise NotImplementedError('Streaming codegen can only handle one \ + prefetch array for now') + + lsize = kwargs.pop('lsize', (16, 4, 8)) + + super().__init__(map_dict, lsize=lsize, **kwargs) diff --git a/pystella/step.py b/pystella/step.py new file mode 100644 index 0000000..6a03f60 --- /dev/null +++ b/pystella/step.py @@ -0,0 +1,605 @@ +__copyright__ = "Copyright (C) 2019 Zachary J Weiner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + + +import numpy as np +import loopy as lp +from pystella.field import Field, Indexer +from pystella.elementwise import ElementWiseMap +from pymbolic import var + +__doc__ = """ +.. currentmodule:: pystella.step +.. autoclass:: Stepper + +Classical Runge-Kutta methods +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. autoclass:: RungeKuttaStepper +.. currentmodule:: pystella +.. autoclass:: RungeKutta4 +.. autoclass:: RungeKutta3SSP +.. autoclass:: RungeKutta3Heun +.. autoclass:: RungeKutta3Nystrom +.. autoclass:: RungeKutta3Ralston +.. autoclass:: RungeKutta2Midpoint +.. autoclass:: RungeKutta2Ralston + +Low-storage Runge-Kutta methods +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. currentmodule:: pystella.step +.. autoclass:: LowStorageRKStepper +.. currentmodule:: pystella +.. autoclass:: LowStorageRK54 +.. autoclass:: LowStorageRK3Williamson +.. autoclass:: LowStorageRK3Inhomogeneous +.. autoclass:: LowStorageRK3SSP +""" + + +class Stepper: + """ + The base class for time steppers, with no implementation of a particular time + stepper. Currently, only explicit timesteppers are supported. + + .. automethod:: __init__ + .. automethod:: __call__ + + .. attribute:: num_stages + + The number of substeps/stages per timestep. + + .. attribute:: expected_order + + The expected convergence order of *global* error, i.e. + :math:`n` such that the global error is :math:`\\mathcal{O}(\\Delta t^n)`. + """ + + num_stages = None + expected_order = None + + def make_steps(self, MapKernel=ElementWiseMap, **kwargs): + raise NotImplementedError + + def __init__(self, input, MapKernel=ElementWiseMap, **kwargs): + """ + :arg input: May be one of the following: + + * a :class:`dict` whose values represent the right-hand side + of the ODEs to solve, i.e., `(key, value)` pairs corresponding to + :math:`(y, f)` such that + + .. math:: + + \\frac{\\mathrm{d} y}{\\mathrm{d} t} = f, + + where :math:`f` is an arbitrary function of kernel data. + Both keys and values must be :mod:`pymbolic` expressions. + + * a :class:`~pystella.Sector`. In this case, the right-hand side + dictionary will be obtained from :attr:`~pystella.Sector.rhs_dict`, and + :attr:`args` from :meth:`~pystella.Sector.get_args`. + + * a :class:`list` of :class:`~pystella.Sector`'s. In this case, the input + obtained from each :class:`~pystella.Sector` (as described above) will + be combined. + + The following keyword arguments are recognized: + + :arg MapKernel: The kernel class which each substep/stage will be an + instance of---i.e., one of :class:`~pystella.ElementWiseMap` or its + subclasses. Defaults to :class:`~pystella.ElementWiseMap`. + + :arg dt: A :class:`float` fixing the value of the timestep interval. + Defaults to *None*, in which case it is not fixed at kernel creation. + + The remaining arguments are passed to :meth:`MapKernel.__init__` for + each substep of the timestepper (i.e., see the documentation of + :class:`~pystella.ElementWiseMap`). + """ + + single_stage = kwargs.pop('single_stage', False) + from pystella import Sector + if isinstance(input, Sector): + self.rhs_dict = input.rhs_dict + self.args = input.get_args(single_stage=single_stage) + elif isinstance(input, list): + self.args = [arg for s in input + for arg in s.get_args(single_stage=single_stage)] + self.rhs_dict = dict(i for s in input for i in s.rhs_dict.items()) + elif isinstance(input, dict): + self.rhs_dict = input + self.args = kwargs.pop('args', ['...']) + self.args = self.args + [lp.ValueArg('dt')] + + dt = kwargs.pop('dt', None) + fixed_parameters = kwargs.pop('fixed_parameters', dict()) + if dt is not None: + fixed_parameters.update(dict(dt=dt)) + + self.num_odes = len(self.rhs_dict.keys()) + self.steps = self.make_steps(**kwargs, fixed_parameters=fixed_parameters) + + def __call__(self, stage, queue=None, **kwargs): + """ + Calls substep/stage ``stage`` (:attr:`steps[stage]`) of the timestepper, + i.e., :func:`pystella.ElementWiseMap.__call__` for the kernel for + substep/stage ``stage``. + + :arg stage: The substep/stage of time timestepper to call. + + :returns: The :class:`pyopencl.Event` associated with the kernel invocation. + """ + + evt, _ = self.steps[stage](queue, **kwargs) + return evt + + +class RungeKuttaStepper(Stepper): + """ + The base implementation of classical, explicit Runge-Kutta time steppers, + which operate by storing and operating on multiple copies of each unknown + array. Subclasses must provide an implementation of :meth:`step_statements` + which returns a key-value pair implementing a specific substep of the + particular timestepper. + + .. warning:: + + To minimize the required storage per unknown (i.e., numbner of + temporaries), the implementation of most subclasses overwrite arrays that + are being read as input to compute right-hand sides. This means that any + non-local (stencil-type) operations must be precomputed and cached + *globally* (unless otherwise noted). + + :raises ValueError: if the keys of :attr:`rhs_dict` are not + :class:`~pystella.Field`'s (or :class:`pymbolic.primitives.Subscript`'s + thereof). This is required for :meth:`make_steps` to be able to prepend + unknown arrays' subscripts with the index corresponding to the temporary + storage axis. + """ + + def step_statements(self, stage, f, dt, rhs): + raise NotImplementedError + + def make_steps(self, MapKernel=ElementWiseMap, **kwargs): + rhs = var('rhs') + dt = var('dt') + q = var('q') + fixed_parameters = kwargs.pop('fixed_parameters', dict()) + + rhs_statements = {rhs[i]: Indexer(value, prepend_with=(q,)) + for i, value in enumerate(self.rhs_dict.values())} + + steps = [] + for stage in range(self.num_stages): + RK_dict = {} + for i, f in enumerate(self.rhs_dict.keys()): + # ensure that key is either a Field or a Subscript of a Field + # so that Indexer can prepend the q index + from pymbolic.primitives import Subscript + key_has_field = False + if isinstance(f, Field): + key_has_field = True + elif isinstance(f, Subscript): + if isinstance(f.aggregate, Field): + key_has_field = True + + if not key_has_field: + raise ValueError("rhs_dict keys must be Field instances") + + statements = self.step_statements(stage, f, dt, rhs[i]) + for k, v in statements.items(): + RK_dict[k] = v + + fixed_parameters.update(q=0 if stage == 0 else 1) + + options = lp.Options(enforce_variable_access_ordered="no_check") + step = MapKernel(map_dict=RK_dict, tmp_dict=rhs_statements, + args=self.args, **kwargs, options=options, + fixed_parameters=fixed_parameters) + steps.append(step) + + return steps + + +class RungeKutta4(RungeKuttaStepper): + """ + The classical, four-stage, fourth-order Runge-Kutta method. + Requires unknown arrays to have temporary storage axes of length three. + """ + + num_stages = 4 + expected_order = 4 + + def step_statements(self, stage, f, dt, rhs): + fq = [Indexer(f, prepend_with=(q,)) for q in range(3)] + + if stage == 0: + return {fq[1]: fq[0] + dt/2 * rhs, + fq[2]: fq[0] + dt/6 * rhs} + elif stage == 1: + return {fq[1]: fq[0] + dt/2 * rhs, + fq[2]: fq[2] + dt/3 * rhs} + elif stage == 2: + return {fq[1]: fq[0] + dt * rhs, + fq[2]: fq[2] + dt/3 * rhs} + elif stage == 3: + return {fq[0]: fq[2] + dt/6 * rhs} + + +class RungeKutta3Heun(RungeKuttaStepper): + """ + Heun's three-stage, third-order Runge-Kutta method. + Requires unknown arrays to have temporary storage axes of length three. + """ + + num_stages = 3 + expected_order = 3 + + def step_statements(self, stage, f, dt, rhs): + fq = [Indexer(f, prepend_with=(q,)) for q in range(3)] + + if stage == 0: + return {fq[1]: fq[0] + dt/3 * rhs, + fq[2]: fq[0] + dt/4 * rhs} + elif stage == 1: + return {fq[1]: fq[0] + dt*2/3 * rhs} + elif stage == 2: + return {fq[0]: fq[2] + dt*3/4 * rhs} + + +class RungeKutta3Nystrom(RungeKuttaStepper): + """ + Nystrom's three-stage, third-order Runge-Kutta method. + Requires unknown arrays to have temporary storage axes of length three. + """ + + num_stages = 3 + expected_order = 3 + + def step_statements(self, stage, f, dt, rhs): + fq = [Indexer(f, prepend_with=(q,)) for q in range(3)] + + if stage == 0: + return {fq[1]: fq[0] + dt*2/3 * rhs, + fq[2]: fq[0] + dt*2/8 * rhs} + elif stage == 1: + return {fq[1]: fq[0] + dt*2/3 * rhs, + fq[2]: fq[2] + dt*3/8 * rhs} + elif stage == 2: + return {fq[0]: fq[2] + dt*3/8 * rhs} + + +class RungeKutta3Ralston(RungeKuttaStepper): + """ + Ralston's three-stage, third-order Runge-Kutta method. + Requires unknown arrays to have temporary storage axes of length three. + """ + + num_stages = 3 + expected_order = 3 + + def step_statements(self, stage, f, dt, rhs): + fq = [Indexer(f, prepend_with=(q,)) for q in range(3)] + + if stage == 0: + return {fq[1]: fq[0] + dt/2 * rhs, + fq[2]: fq[0] + dt*2/9 * rhs} + elif stage == 1: + return {fq[1]: fq[0] + dt*3/4 * rhs, + fq[2]: fq[2] + dt*1/3 * rhs} + elif stage == 2: + return {fq[0]: fq[2] + dt*4/9 * rhs} + + +class RungeKutta3SSP(RungeKuttaStepper): + """ + A three-stage, third-order strong-stability preserving Runge-Kutta method. + Requires unknown arrays to have temporary storage axes of length two. + """ + + num_stages = 3 + expected_order = 3 + + def step_statements(self, stage, f, dt, rhs): + fq = [Indexer(f, prepend_with=(q,)) for q in range(3)] + + if stage == 0: + return {fq[1]: fq[0] + dt * rhs} + elif stage == 1: + return {fq[1]: 3/4 * fq[0] + 1/4 * fq[1] + dt/4 * rhs} + elif stage == 2: + return {fq[0]: 1/3 * fq[0] + 2/3 * fq[1] + dt*2/3 * rhs} + + +class RungeKutta2Midpoint(RungeKuttaStepper): + """ + The "midpoint" method, a two-stage, second-order Runge-Kutta method. + Requires unknown arrays to have temporary storage axes of length two. + + .. note:: + + Right-hand side operations *can* safely involve non-local computations + of unknown arrays for this method. + """ + + num_stages = 2 + expected_order = 2 + + def step_statements(self, stage, f, dt, rhs): + fq = [Indexer(f, prepend_with=(q,)) for q in range(2)] + + if stage == 0: + return {fq[1]: fq[0] + dt/2 * rhs} + elif stage == 1: + return {fq[0]: fq[0] + dt * rhs} + + +# possible order reduction +class RungeKutta2Heun(RungeKuttaStepper): + num_stages = 2 + expected_order = 2 + + def step_statements(self, stage, f, dt, rhs): + fq = [Indexer(f, prepend_with=(q,)) for q in range(2)] + + if stage == 0: + return {fq[1]: fq[0] + dt * rhs, + fq[0]: fq[0] + dt/2 * rhs} + elif stage == 1: + return {fq[0]: fq[0] + dt/2 * rhs} + + +class RungeKutta2Ralston(RungeKuttaStepper): + """ + Ralstons's two-stage, second-order Runge-Kutta method. + Requires unknown arrays to have temporary storage axes of length two. + """ + + num_stages = 2 + expected_order = 2 + + def step_statements(self, stage, f, dt, rhs): + fq = [Indexer(f, prepend_with=(q,)) for q in range(2)] + + if stage == 0: + return {fq[1]: fq[0] + dt*2/3 * rhs, + fq[0]: fq[0] + dt/4 * rhs} + elif stage == 1: + return {fq[0]: fq[0] + dt*3/4 * rhs} + + +class LowStorageRKStepper(Stepper): + """ + The base implementation of low-storage, explicit Runge-Kutta time steppers, + which operate by storing and operating on a single copy of each unknown array, + plus an auxillary temporary array. + + The substeps are expressed in a standard form, drawing coefficients from + a subclass's provided values of :attr:`_A`, :attr:`_B`, and :attr:`_C`. + + .. automethod:: __init__ + """ + + _A = [] + _B = [] + _C = [] + + def make_steps(self, MapKernel=ElementWiseMap, **kwargs): + self.args = self.args + [lp.GlobalArg('k_tmp', shape=lp.auto)] + + rhs = var('rhs') + dt = var('dt') + # filter out indices for zero axes + # FIXME: Field.indices should never include offset, so that this can just + # replicate test_field.indices (rename to index_tuple?) + test_field = list(self.rhs_dict.keys())[0] + from pymbolic.primitives import Subscript + if isinstance(test_field, Field): + num_indices = len(test_field.indices) + elif isinstance(test_field, Subscript): + if isinstance(test_field.aggregate, Field): + num_indices = len(test_field.aggregate.indices) + else: + num_indices = len(test_field.index_tuple) + else: + num_indices = 0 + + indices = ('i', 'j', 'k')[:num_indices] + k = Field('k_tmp', indices=indices) + + rhs_statements = {rhs[i]: Indexer(value) + for i, value in enumerate(self.rhs_dict.values())} + + steps = [] + for stage in range(self.num_stages): + RK_dict = {} + for i, key in enumerate(self.rhs_dict.keys()): + f = Indexer(key) + k_i = Indexer(k[i]) + RK_dict[k_i] = self._A[stage] * k_i + dt * rhs[i] + RK_dict[f] = f + self._B[stage] * k_i + + step = MapKernel(map_dict=RK_dict, tmp_dict=rhs_statements, + args=self.args, **kwargs) + steps.append(step) + + return steps + + def __init__(self, input, k_tmp, **kwargs): + """ + :arg k_tmp: The array used for temporary + calculations. Its outer-/left-most axis (i.e., the axis of largest + stride) must have length equal to the total number of unknown ODEs. + + .. note:: + + ``k_tmp`` may be replaced by inputting a :class:`pyopencl.CommandQueue` + in a future version of :mod:`pystella`. In this case, the creation of + this array would be done automatically. + + Otherwise identical to :func:`Stepper.__init__`. + """ + + super().__init__(input, single_stage=True, **kwargs) + self.k_tmp = k_tmp + + def __call__(self, stage, queue=None, **kwargs): + evt, _ = self.steps[stage](queue, k_tmp=self.k_tmp, **kwargs) + return evt + + +class LowStorageRK54(LowStorageRKStepper): + """ + A five-stage, fourth-order, low-storage Runge-Kutta method. + + See + Carpenter, M.H., and Kennedy, C.A., Fourth-order-2N-storage + Runge-Kutta schemes, NASA Langley Tech Report TM 109112, 1994 + """ + + num_stages = 5 + expected_order = 4 + + _A = [ + 0, + -567301805773 / 1357537059087, + -2404267990393 / 2016746695238, + -3550918686646 / 2091501179385, + -1275806237668 / 842570457699, + ] + + _B = [ + 1432997174477 / 9575080441755, + 5161836677717 / 13612068292357, + 1720146321549 / 2090206949498, + 3134564353537 / 4481467310338, + 2277821191437 / 14882151754819, + ] + + _C = [ + 0, + 1432997174477 / 9575080441755, + 2526269341429 / 6820363962896, + 2006345519317 / 3224310063776, + 2802321613138 / 2924317926251, + ] + + +class LowStorageRK3Williamson(LowStorageRKStepper): + """ + A three-stage, third-order, low-storage Runge-Kutta method. + + See + Williamson, J. H., Low-storage Runge-Kutta schemes, + J. Comput. Phys., 35, 48-56, 1980 + """ + + num_stages = 3 + expected_order = 3 + + _A = [0, -5/9, -153/128] + + _B = [1/3, 15/16, 8/15] + + _C = [0, 4/9, 15/32] + + +class LowStorageRK3Inhomogeneous(LowStorageRKStepper): + """ + A three-stage, third-order, low-storage Runge-Kutta method. + """ + + num_stages = 3 + expected_order = 3 + + _A = [0, -17/32, -32/27] + + _B = [1/4, 8/9, 3/4] + + _C = [0, 15/32, 4/9] + + +# possible order reduction +class LowStorageRK3Symmetric(LowStorageRKStepper): + num_stages = 3 + expected_order = 3 + + _A = [0, -2/3, -1] + + _B = [1/3, 1, 1/2] + + _C = [0, 1/3, 2/3] + + +# possible order reduction +class LowStorageRK3PredictorCorrector(LowStorageRKStepper): + num_stages = 3 + expected_order = 3 + + _A = [0, -1/4, -4/3] + + _B = [1/2, 2/3, 1/2] + + _C = [0, 1/2, 1] + + +c2 = .924574 +z1 = np.sqrt(36 * c2**4 + 36 * c2**3 - 135 * c2**2 + 84 * c2 - 12) +z2 = 2 * c2**2 + c2 - 2 +z3 = 12 * c2**4 - 18 * c2**3 + 18 * c2**2 - 11 * c2 + 2 +z4 = 36 * c2**4 - 36 * c2**3 + 13 * c2**2 - 8 * c2 + 4 +z5 = 69 * c2**3 - 62 * c2**2 + 28 * c2 - 8 +z6 = 34 * c2**4 - 46 * c2**3 + 34 * c2**2 - 13 * c2 + 2 +B1 = c2 +B2 = (12 * c2 * (c2 - 1) * (3 * z2 - z1) - (3 * z2 - z1)**2) \ + / (144 * c2 * (3 * c2 - 2) * (c2 - 1)**2) +B3 = - 24 * (3 * c2 - 2) * (c2 - 1)**2 \ + / ((3 * z2 - z1)**2 - 12 * c2 * (c2 - 1) * (3 * z2 - z1)) +A2 = (- z1 * (6 * c2**2 - 4 * c2 + 1) + 3 * z3) \ + / ((2 * c2 + 1) * z1 - 3 * (c2 + 2) * (2 * c2 - 1)**2) +A3 = (- z4 * z1 + 108 * (2 * c2 - 1) * c2**5 - 3 * (2 * c2 - 1) * z5) \ + / (24 * z1 * c2 * (c2 - 1)**4 + 72 * c2 * z6 + 72 * c2**6 * (2 * c2 - 13)) + + +class LowStorageRK3SSP(LowStorageRKStepper): + """ + A three-stage, third-order, strong-stability preserving, low-storage + Runge-Kutta method. + """ + + num_stages = 3 + expected_order = 3 + + _A = [0, A2, A3] + + _B = [B1, B2, B3] + + _C = [0, B1, B1 + B2 * (A2 + 1)] + + +all_steppers = [RungeKutta4, RungeKutta3SSP, RungeKutta3Heun, RungeKutta3Nystrom, + RungeKutta3Ralston, RungeKutta2Midpoint, + RungeKutta2Ralston, LowStorageRK54, + LowStorageRK3Williamson, LowStorageRK3Inhomogeneous, + LowStorageRK3SSP] diff --git a/run_tests.sh b/run_tests.sh new file mode 100644 index 0000000..040b684 --- /dev/null +++ b/run_tests.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +mpiexec -n 1 pytest --proc_shape 1,1,1 --grid_shape 128,128,128 +mpiexec -n 2 pytest --proc_shape 2,1,1 --grid_shape 128,128,128 +mpiexec -n 2 pytest --proc_shape 1,2,1 --grid_shape 128,128,128 +mpiexec -n 4 pytest --proc_shape 2,2,1 --grid_shape 128,128,128 +mpiexec -n 4 pytest --proc_shape 4,1,1 --grid_shape 128,128,128 +mpiexec -n 4 pytest --proc_shape 1,4,1 --grid_shape 128,128,128 diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..a9fe54b --- /dev/null +++ b/setup.cfg @@ -0,0 +1,14 @@ +[flake8] +ignore = E126,E127,E128,E123,E226,E241,E242,E265,N802,W503,E402,N814,W504 +max-line-length=85 + +[pylint] +disable = all +enable = E,C0330 +output-format = colorized + +[build_sphinx] +build-dir = doc/_build/ + +[tool:pytest] +filterwarnings = ignore::DeprecationWarning diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..96c0ef0 --- /dev/null +++ b/setup.py @@ -0,0 +1,66 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +from setuptools import setup, find_packages + + +# authoritative version in pytools/__init__.py +def find_git_revision(tree_root): + # Keep this routine self-contained so that it can be copy-pasted into + # setup.py. + + from os.path import join, exists, abspath + tree_root = abspath(tree_root) + + if not exists(join(tree_root, ".git")): + return None + + from subprocess import Popen, PIPE, STDOUT + p = Popen(["git", "rev-parse", "HEAD"], shell=False, + stdin=PIPE, stdout=PIPE, stderr=STDOUT, close_fds=True, + cwd=tree_root) + (git_rev, _) = p.communicate() + + import sys + if sys.version_info >= (3,): + git_rev = git_rev.decode() + + git_rev = git_rev.rstrip() + + retcode = p.returncode + assert retcode is not None + if retcode != 0: + from warnings import warn + warn("unable to find git revision") + return None + + return git_rev + + +def write_git_revision(package_name): + from os.path import dirname, join + dn = dirname(__file__) + git_rev = find_git_revision(dn) + + with open(join(dn, package_name, "_git_rev.py"), "w") as outf: + outf.write("GIT_REVISION = %s\n" % repr(git_rev)) + + +write_git_revision("pystella") + + +setup(name="pystella", + version="2019.5", + description="A code generator for grid-based PDE solving on CPUs and GPUs", + long_description=open("README.rst", "rt").read(), + + install_requires=[ + "numpy", + "pyopencl", + "loo.py", + ], + + author="Zachary J Weiner", + license="MIT", + packages=find_packages(), + ) diff --git a/test/common.py b/test/common.py new file mode 100644 index 0000000..e4bba63 --- /dev/null +++ b/test/common.py @@ -0,0 +1,63 @@ +__copyright__ = "Copyright (C) 2019 Zachary J Weiner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + + +from time import time +import pyopencl as cl + + +def timer(kernel, ntime=200, nwarmup=2): + for i in range(nwarmup): + kernel() + + start = time() + for i in range(ntime): + evt = kernel() + + if isinstance(evt, cl.Event): + evt.wait() + + end = time() + + return (end - start) / ntime * 1e3 + + +def get_exec_arg_dict(): + """ + Interprets command line arguments (obtained from `sys.argv`) as key-value + pairs. Entries corresponding to values are passed to :func:`eval` and stored + as such, unless :func:`eval` raises an exception, in which case the string + input itself is stored. + + :returns: A :class:`dict` of the command-line arguments. + """ + + def eval_unless_str(string): + try: + x = eval(string) + except: # noqa: E722 + x = string + return x + + import sys + ll = sys.argv[1:] + return dict(zip(ll[::2], map(eval_unless_str, ll[1::2]))) diff --git a/test/conftest.py b/test/conftest.py new file mode 100644 index 0000000..1490c2e --- /dev/null +++ b/test/conftest.py @@ -0,0 +1,22 @@ +def pytest_addoption(parser): + parser.addoption("--grid_shape", action="store", default=(128,)*3) + parser.addoption("--proc_shape", action="store", default=(1,)*3) + + +def tuplify(string): + if isinstance(string, str): + return tuple(int(i) for i in string.split(',')) + else: + return string + + +def pytest_generate_tests(metafunc): + # This is called for every test. Only get/set command line arguments + # if the argument is specified in the list of test "fixturenames". + grid_shape = metafunc.config.option.grid_shape + if 'grid_shape' in metafunc.fixturenames and grid_shape is not None: + metafunc.parametrize("grid_shape", [tuplify(grid_shape)]) + + proc_shape = metafunc.config.option.proc_shape + if 'proc_shape' in metafunc.fixturenames and proc_shape is not None: + metafunc.parametrize("proc_shape", [tuplify(proc_shape)]) diff --git a/test/test_decomp.py b/test/test_decomp.py new file mode 100644 index 0000000..9697249 --- /dev/null +++ b/test/test_decomp.py @@ -0,0 +1,196 @@ +__copyright__ = "Copyright (C) 2019 Zachary J Weiner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + + +import numpy as np +import pyopencl as cl +import pyopencl.clrandom as clr +import pyopencl.array as cla +import pystella as ps +import pytest + +from pyopencl.tools import ( # noqa + pytest_generate_tests_for_pyopencl as pytest_generate_tests) + + +@pytest.mark.parametrize("h", [1, 2]) +@pytest.mark.parametrize("dtype", [np.float64, np.float32]) +@pytest.mark.parametrize("_grid_shape", [None, (128, 64, 32)]) +@pytest.mark.parametrize("pass_rank_shape", [True, False]) +def test_share_halos(ctx_factory, grid_shape, proc_shape, h, dtype, + _grid_shape, pass_rank_shape, timing=False): + if ctx_factory: + ctx = ctx_factory() + else: + ctx = ps.choose_device_and_make_context() + + queue = cl.CommandQueue(ctx) + grid_shape = _grid_shape or grid_shape + rank_shape = tuple(Ni // pi for Ni, pi in zip(grid_shape, proc_shape)) + mpi = ps.DomainDecomposition( + proc_shape, h, rank_shape=(rank_shape if pass_rank_shape else None) + ) + + rng = clr.ThreefryGenerator(ctx, seed=12321) + data = rng.uniform(queue, tuple(Ni + 2*h for Ni in grid_shape), dtype).get() + data[:h, :, :] = data[-2*h:-h, :, :].copy() + data[-h:, :, :] = data[h:2*h, :, :].copy() + data[:, :h, :] = data[:, -2*h:-h, :].copy() + data[:, -h:, :] = data[:, h:2*h, :].copy() + data[:, :, :h] = data[:, :, -2*h:-h].copy() + data[:, :, -h:] = data[:, :, h:2*h].copy() + + subdata = np.empty(tuple(ni + 2*h for ni in rank_shape), dtype) + rank_slice = tuple(slice(ri * ni + h, (ri+1) * ni + h) + for ri, ni in zip(mpi.rank_tuple, rank_shape)) + subdata[h:-h, h:-h, h:-h] = data[rank_slice].copy() + + subdata_device = clr.rand(queue, tuple(ni + 2*h for ni in rank_shape), dtype) + cl.enqueue_copy(queue, subdata_device.data, subdata) + + mpi.share_halos(queue, subdata_device) + cl.enqueue_copy(queue, subdata, subdata_device.data) + + pencil_slice = tuple(slice(ri * ni, (ri+1) * ni + 2*h) + for ri, ni in zip(mpi.rank_tuple, rank_shape)) + assert (subdata == data[pencil_slice]).all(), \ + "rank %d %s has incorrect halo data \n" % (mpi.rank, mpi.rank_tuple) + + # test that can call with different-shaped input + if not pass_rank_shape: + subdata_device_new = clr.rand(queue, tuple(ni//2 + 2*h for ni in rank_shape), + dtype) + mpi.share_halos(queue, subdata_device_new) + + if timing: + from common import timer + t = timer(lambda: mpi.share_halos(queue, fx=subdata_device)) + if mpi.rank == 0: + print("share_halos took %.3f ms for grid_shape=%s, h=%d, proc_shape=%s" + % (t, str(grid_shape), h, str(proc_shape))) + + +@pytest.mark.parametrize("h", [1, 2]) +@pytest.mark.parametrize("dtype", [np.float64, np.float32]) +@pytest.mark.parametrize("_grid_shape", [None, (128, 64, 32)]) +def test_gather_scatter(ctx_factory, grid_shape, proc_shape, h, dtype, + _grid_shape, timing=False): + if ctx_factory: + ctx = ctx_factory() + else: + ctx = ps.choose_device_and_make_context() + + queue = cl.CommandQueue(ctx) + grid_shape = _grid_shape or grid_shape + rank_shape = tuple(Ni // pi for Ni, pi in zip(grid_shape, proc_shape)) + mpi = ps.DomainDecomposition(proc_shape, h) + + rank_slice = tuple(slice(ri * ni, (ri+1) * ni) + for ri, ni in zip(mpi.rank_tuple, rank_shape)) + pencil_shape = tuple(ni + 2*h for ni in rank_shape) + + # create random data with same seed on all ranks + rng = clr.ThreefryGenerator(ctx, seed=12321) + data = rng.uniform(queue, grid_shape, dtype) + + # cl.Array -> cl.Array + subdata = cla.zeros(queue, pencil_shape, dtype) + mpi.scatter_array(queue, data, subdata, 0) + sub_h = subdata.get() + data_h = data.get() + assert (sub_h[h:-h, h:-h, h:-h] == data_h[rank_slice]).all() + + data_test = cla.zeros_like(data) + mpi.gather_array(queue, subdata, data_test, 0) + data_test_h = data_test.get() + if mpi.rank == 0: + assert (data_test_h == data_h).all() + + # np.ndarray -> np.ndarray + mpi.scatter_array(queue, data_h, sub_h, 0) + assert (sub_h[h:-h, h:-h, h:-h] == data_h[rank_slice]).all() + + mpi.gather_array(queue, sub_h, data_test_h, 0) + if mpi.rank == 0: + assert (data_test_h == data_h).all() + + # scatter cl.Array -> np.ndarray + sub_h[:] = 0 + mpi.scatter_array(queue, data, sub_h, 0) + assert (sub_h[h:-h, h:-h, h:-h] == data_h[rank_slice]).all() + + # gather np.ndarray -> cl.Array + data_test[:] = 0 + mpi.gather_array(queue, sub_h, data_test, 0) + data_test_h = data_test.get() + if mpi.rank == 0: + assert (data_test_h == data_h).all() + + # scatter np.ndarray -> cl.Array + subdata[:] = 0 + mpi.scatter_array(queue, data_h, subdata, 0) + sub_h = subdata.get() + assert (sub_h[h:-h, h:-h, h:-h] == data_h[rank_slice]).all() + + # gather cl.Array -> np.ndarray + data_test_h[:] = 0 + mpi.gather_array(queue, subdata, data_test_h, 0) + if mpi.rank == 0: + assert (data_test_h == data_h).all() + + if timing: + from common import timer + ntime = 25 + times = {} + + times['scatter cl.Array -> cl.Array'] = \ + timer(lambda: mpi.scatter_array(queue, data, subdata, 0), ntime=ntime) + times['scatter cl.Array -> np.ndarray'] = \ + timer(lambda: mpi.scatter_array(queue, data, sub_h, 0), ntime=ntime) + times['scatter np.ndarray -> cl.Array'] = \ + timer(lambda: mpi.scatter_array(queue, data_h, subdata, 0), ntime=ntime) + times['scatter np.ndarray -> np.ndarray'] = \ + timer(lambda: mpi.scatter_array(queue, data_h, sub_h, 0), ntime=ntime) + + times['gather cl.Array -> cl.Array'] = \ + timer(lambda: mpi.gather_array(queue, subdata, data, 0), ntime=ntime) + times['gather cl.Array -> np.ndarray'] = \ + timer(lambda: mpi.gather_array(queue, subdata, data_h, 0), ntime=ntime) + times['gather np.ndarray -> cl.Array'] = \ + timer(lambda: mpi.gather_array(queue, sub_h, data, 0), ntime=ntime) + times['gather np.ndarray -> np.ndarray'] = \ + timer(lambda: mpi.gather_array(queue, sub_h, data_h, 0), ntime=ntime) + + if mpi.rank == 0: + print("grid_shape=%s, h=%d, proc_shape=%s" + % (str(grid_shape), h, str(proc_shape))) + for key, val in times.items(): + print(key, 'took', '%.3f' % val, 'ms') + + +if __name__ == "__main__": + args = {'grid_shape': (256,)*3, 'proc_shape': (1,)*3, 'h': 2, + 'dtype': np.float64, '_grid_shape': None} + from common import get_exec_arg_dict + args.update(get_exec_arg_dict()) + test_share_halos(None, **args, pass_rank_shape=True, timing=True) + test_gather_scatter(None, **args, timing=True) diff --git a/test/test_derivs.py b/test/test_derivs.py new file mode 100644 index 0000000..d11dfd1 --- /dev/null +++ b/test/test_derivs.py @@ -0,0 +1,165 @@ +__copyright__ = "Copyright (C) 2019 Zachary J Weiner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + + +import numpy as np +import pyopencl as cl +import pyopencl.array as cla +import pyopencl.clmath as clm +import pystella as ps +import pytest + +from pyopencl.tools import ( # noqa + pytest_generate_tests_for_pyopencl as pytest_generate_tests) + + +@pytest.mark.filterwarnings( + "ignore::pyopencl.characterize.CLCharacterizationWarning") +@pytest.mark.filterwarnings("ignore::loopy.diagnostic.LoopyAdvisory") +@pytest.mark.parametrize("h", [0, 1, 2, 3, 4]) +@pytest.mark.parametrize("dtype", [np.float64]) +@pytest.mark.parametrize("stream", [True, False]) +def test_gradient_laplacian(ctx_factory, grid_shape, proc_shape, h, dtype, + stream, timing=False): + if h == 0 and stream is True: + pytest.skip('no streaming spectral') + + if ctx_factory: + ctx = ctx_factory() + else: + ctx = ps.choose_device_and_make_context() + + queue = cl.CommandQueue(ctx) + rank_shape = tuple(Ni // pi for Ni, pi in zip(grid_shape, proc_shape)) + mpi = ps.DomainDecomposition(proc_shape, h, rank_shape) + + L = (3, 5, 7) + dx = tuple(Li / Ni for Li, Ni in zip(L, grid_shape)) + dk = tuple(2 * np.pi / Li for Li in L) + + if h == 0: + def get_evals_1(k, dx): + return k + + def get_evals_2(k, dx): + return - k**2 + + fft = ps.DFT(mpi, ctx, queue, grid_shape, dtype) + derivs = ps.SpectralGradientLaplacian(fft, dk) + else: + from pystella.derivs import FirstCenteredDifference, SecondCenteredDifference + get_evals_1 = FirstCenteredDifference(h).get_eigenvalues + get_evals_2 = SecondCenteredDifference(h).get_eigenvalues + if stream: + try: + derivs = ps.GradientLaplacian(mpi, h, dx, stream=stream) + except: # noqa + pytest.skip("StreamingStencil unavailable") + else: + derivs = ps.GradientLaplacian(mpi, h, dx, stream=False) + + pencil_shape = tuple(ni + 2*h for ni in rank_shape) + + # set up test data + fx_h = np.empty(pencil_shape, dtype) + kvec = np.array(dk) * np.array([-5, 4, -3]).astype(dtype) + xvec = np.meshgrid(*[dxi * np.arange(ri*ni, (ri+1)*ni) + for dxi, ri, ni in zip(dx, mpi.rank_tuple, rank_shape)], + indexing='ij') + + phases = sum(ki * xi for ki, xi in zip(kvec, xvec)) + if h > 0: + fx_h[h:-h, h:-h, h:-h] = np.sin(phases) + else: + fx_h[:] = np.sin(phases) + fx_cos = np.cos(phases) + + fx = cla.empty(queue, pencil_shape, dtype) + fx.set(fx_h) + + lap = cla.empty(queue, rank_shape, dtype) + grd = cla.empty(queue, (3,)+rank_shape, dtype) + + derivs(queue, fx=fx, lap=lap, grd=grd) + + eff_kmag_sq = sum(get_evals_2(kvec_i, dxi) for dxi, kvec_i in zip(dx, kvec)) + + lap_true = cla.to_device(queue, eff_kmag_sq * np.sin(phases)) + diff = clm.fabs(lap - lap_true) + + max_err = cla.max(diff) / cla.max(clm.fabs(lap_true)) + avg_err = cla.sum(diff) / cla.sum(clm.fabs(lap_true)) + + max_rtol = 1.e-11 if dtype == np.float64 else 3.e-4 + avg_rtol = 1.e-12 if dtype == np.float64 else 5.e-5 + + assert max_err < max_rtol and avg_err < avg_rtol, \ + "lap inaccurate for h=%d, grid_shape=%s, proc_shape=%s" \ + % (h, grid_shape, proc_shape) + + for i in range(3): + eff_k = get_evals_1(kvec[i], dx[i]) + + pdi_true = cla.to_device(queue, eff_k * fx_cos) + diff = clm.fabs(grd[i] - pdi_true) + + max_err = cla.max(diff) / cla.max(clm.fabs(pdi_true)) + avg_err = cla.sum(diff) / cla.sum(clm.fabs(pdi_true)) + + max_rtol = 1.e-12 if dtype == np.float64 else 1.e-5 + avg_rtol = 1.e-13 if dtype == np.float64 else 3.e-6 + + assert max_err < max_rtol and avg_err < avg_rtol, \ + "pd%d inaccurate for h=%d, grid_shape=%s, proc_shape=%s" \ + % (i, h, grid_shape, proc_shape) + + if timing: + from common import timer + + times = {} + times['gradient and laplacian'] = \ + timer(lambda: derivs(queue, fx=fx, lap=lap, grd=grd)) + times['gradient'] = \ + timer(lambda: derivs(queue, fx=fx, grd=grd)) + times['laplacian'] = timer(lambda: derivs(queue, fx=fx, lap=lap)) + times['pdx'] = timer(lambda: derivs(queue, fx=fx, pdx=grd[0])) + times['pdy'] = timer(lambda: derivs(queue, fx=fx, pdy=grd[1])) + times['pdz'] = timer(lambda: derivs(queue, fx=fx, pdz=grd[2])) + + if mpi.rank == 0: + print("grid_shape=%s, h=%d, proc_shape=%s" + % (str(grid_shape), h, str(proc_shape))) + for key, val in times.items(): + print(key, 'took', '%.3f' % val, 'ms') + + +if __name__ == "__main__": + args = {'grid_shape': (256,)*3, 'proc_shape': (1,)*3, 'dtype': np.float64, + 'h': 2} + from common import get_exec_arg_dict + args.update(get_exec_arg_dict()) + + for stream in [True, False]: + test_gradient_laplacian(None, **args, stream=stream, timing=True) + + args['h'] = 0 + test_gradient_laplacian(None, **args, stream=False, timing=True) diff --git a/test/test_dft.py b/test/test_dft.py new file mode 100644 index 0000000..39c7a92 --- /dev/null +++ b/test/test_dft.py @@ -0,0 +1,114 @@ +__copyright__ = "Copyright (C) 2019 Zachary J Weiner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + + +import numpy as np +import pyopencl as cl +import pyopencl.clrandom as clr +import pyopencl.array as cla +import pystella as ps +import pytest + +from pyopencl.tools import ( # noqa + pytest_generate_tests_for_pyopencl as pytest_generate_tests) + + +@pytest.mark.parametrize("dtype", [np.float64, np.float32]) +def test_dft(ctx_factory, grid_shape, proc_shape, dtype, timing=False): + if ctx_factory: + ctx = ctx_factory() + else: + ctx = ps.choose_device_and_make_context() + + queue = cl.CommandQueue(ctx) + h = 1 + rank_shape = tuple(Ni // pi for Ni, pi in zip(grid_shape, proc_shape)) + mpi = ps.DomainDecomposition(proc_shape, h, rank_shape) + + fft = ps.DFT(mpi, ctx, queue, grid_shape, dtype) + grid_size = np.product(grid_shape) + + if proc_shape[0] * proc_shape[1] * proc_shape[2] == 1: + rng = clr.ThreefryGenerator(ctx, seed=12321) + fx = rng.uniform(queue, grid_shape, dtype) + 1.e-2 + fx1 = fx.get() + + fk = fft.dft(fx) + fk1 = fk.get() + fk_np = np.fft.rfftn(fx1) + + fx2 = fft.idft(fk).get() + fx_np = np.fft.irfftn(fk1) + + rtol = 1.e-11 if dtype == np.float64 else 2.e-3 + assert np.allclose(fx1, fx2 / grid_size, rtol=rtol, atol=0), \ + "IDFT(DFT(f)) != f for grid_shape=%s" % str(grid_shape) + + assert np.allclose(fk_np, fk1, rtol=rtol, atol=0), \ + "DFT disagrees with numpy for grid_shape=%s" % str(grid_shape) + + assert np.allclose(fx_np, fx2 / grid_size, rtol=rtol, atol=0), \ + "IDFT disagrees with numpy for grid_shape=%s" % str(grid_shape) + + fx_cl = cla.empty(queue, rank_shape, dtype) + pencil_shape = tuple(ni + 2*h for ni in rank_shape) + fx_cl_halo = cla.empty(queue, pencil_shape, dtype) + fx_np = np.empty(rank_shape, dtype) + fx_np_halo = np.empty(pencil_shape, dtype) + fk_cl = cla.empty(queue, fft.shape(True), fft.fk.dtype) + fk_np = np.empty(fft.shape(True), fft.fk.dtype) + + # FIXME: check that these actually produce the correct result + fx_types = {'cl': fx_cl, 'cl halo': fx_cl_halo, + 'np': fx_np, 'np halo': fx_np_halo, + 'None': None} + + fk_types = {'cl': fk_cl, 'np': fk_np, 'None': None} + + # run all of these to ensure no runtime errors even if no timing + if timing: + ntime = 20 + else: + ntime = 1 + + from common import timer + + if mpi.rank == 0: + print("N = %s" % str(grid_shape)) + + from itertools import product + for (a, input_), (b, output) in product(fx_types.items(), fk_types.items()): + t = timer(lambda: fft.dft(input_, output), ntime=ntime) + if mpi.rank == 0: + print("dft(%s, %s) took %.3f ms" % (a, b, t)) + + for (a, input_), (b, output) in product(fk_types.items(), fx_types.items()): + t = timer(lambda: fft.idft(input_, output), ntime=ntime) + if mpi.rank == 0: + print("idft(%s, %s) took %.3f ms" % (a, b, t)) + + +if __name__ == "__main__": + args = {'grid_shape': (256,)*3, 'proc_shape': (1,)*3, 'dtype': np.float64} + from common import get_exec_arg_dict + args.update(get_exec_arg_dict()) + test_dft(None, **args, timing=True) diff --git a/test/test_elementwise.py b/test/test_elementwise.py new file mode 100644 index 0000000..52c01aa --- /dev/null +++ b/test/test_elementwise.py @@ -0,0 +1,104 @@ +__copyright__ = "Copyright (C) 2019 Zachary J Weiner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + + +import numpy as np +import pyopencl as cl +import pyopencl.clrandom as clr +import pystella as ps +import pytest + +from pyopencl.tools import ( # noqa + pytest_generate_tests_for_pyopencl as pytest_generate_tests) + + +@pytest.mark.parametrize("dtype", [np.float64, np.float32]) +def test_elementwise(ctx_factory, grid_shape, proc_shape, dtype, timing=False): + if ctx_factory: + ctx = ctx_factory() + else: + ctx = ps.choose_device_and_make_context() + + queue = cl.CommandQueue(ctx) + rank_shape = tuple(Ni // pi for Ni, pi in zip(grid_shape, proc_shape)) + + from pymbolic import var + a = var('a') + b = var('b') + + from pystella.field import Field + x = Field('x') + y = Field('y') + z = Field('z') + + tmp_dict = {a[0]: x + 2, + a[1]: 2 + x * y, + b: x + y / 2} + map_dict = {x: a[0] * y**2 * x + a[1] * b, + z: z + a[1] * b} + single_insn = {x: y + z} + + ew_map = ps.ElementWiseMap(map_dict, tmp_dict=tmp_dict) + + x = clr.rand(queue, rank_shape, dtype=dtype) + y = clr.rand(queue, rank_shape, dtype=dtype) + z = clr.rand(queue, rank_shape, dtype=dtype) + + a0 = x + 2 + a1 = 2 + x * y + b = x + y / 2 + x_true = a0 * y**2 * x + a1 * b + z_true = z + a1 * b + + ew_map(queue, x=x, y=y, z=z) + + rtol = 5.e-14 if dtype == np.float64 else 1.e-5 + + assert np.allclose(x.get(), x_true.get(), rtol=rtol, atol=0), \ + "x innaccurate for grid_shape=%s, proc_shape=%s" \ + % (str(grid_shape), str(proc_shape)) + + assert np.allclose(z.get(), z_true.get(), rtol=rtol, atol=0), \ + "z innaccurate for grid_shape=%s, proc_shape=%s" \ + % (str(grid_shape), str(proc_shape)) + + # test success of single instruction + ew_map_single = ps.ElementWiseMap(single_insn) + ew_map_single(queue, x=x, y=y, z=z) + + assert np.allclose(x.get(), y.get() + z.get(), rtol=rtol, atol=0), \ + "x innaccurate for grid_shape=%s, proc_shape=%s" \ + % (str(grid_shape), str(proc_shape)) + + if timing: + from common import timer + t = timer(lambda: ew_map(queue, x=x, y=y, z=z)[0]) + print("elementwise map took %.3f ms for grid_shape=%s, proc_shape=%s" + % (t, str(grid_shape), str(proc_shape))) + print("Bandwidth = %.1f GB/s" % (5 * x.nbytes/1024**3 / t * 1000)) + + +if __name__ == "__main__": + args = {'grid_shape': (256,)*3, 'proc_shape': (1,)*3, 'dtype': np.float64} + from common import get_exec_arg_dict + args.update(get_exec_arg_dict()) + test_elementwise(None, **args, timing=True) diff --git a/test/test_energy.py b/test/test_energy.py new file mode 100644 index 0000000..09d2f2b --- /dev/null +++ b/test/test_energy.py @@ -0,0 +1,110 @@ +__copyright__ = "Copyright (C) 2019 Zachary J Weiner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + + +import numpy as np +import pyopencl as cl +import pyopencl.clrandom as clr +import pystella as ps +import pytest +# pylint: disable=no-member + +from pyopencl.tools import ( # noqa + pytest_generate_tests_for_pyopencl as pytest_generate_tests) + + +@pytest.mark.parametrize("h", [1, 2]) +@pytest.mark.parametrize("dtype", [np.float64, np.float32]) +def test_scalar_energy(ctx_factory, grid_shape, proc_shape, h, dtype, timing=False): + if ctx_factory: + ctx = ctx_factory() + else: + ctx = ps.choose_device_and_make_context() + + queue = cl.CommandQueue(ctx) + rank_shape = tuple(Ni // pi for Ni, pi in zip(grid_shape, proc_shape)) + mpi = ps.DomainDecomposition(proc_shape, h, rank_shape) + + grid_size = np.product(grid_shape) + + nscalars = 2 + + def potential(f): + phi, chi = f[0], f[1] + return 1/2 * phi**2 + 1/2 * chi**2 + 1/2 * phi**2 * chi**2 + + scalar_sector = ps.ScalarSector(nscalars, potential=potential) + scalar_energy = ps.Reduction(mpi, scalar_sector, + rank_shape=rank_shape, grid_size=grid_size, h=h) + + pencil_shape = tuple(ni+2*h for ni in rank_shape) + f = clr.rand(queue, (nscalars,)+pencil_shape, dtype) + dfdt = clr.rand(queue, (nscalars,)+pencil_shape, dtype) + lap = clr.rand(queue, (nscalars,)+rank_shape, dtype) + + energy = scalar_energy(queue, f=f, dfdt=dfdt, lap_f=lap, a=np.array(1.)) + + kin_test = [] + grad_test = [] + for fld in range(nscalars): + df_h = dfdt[fld].get() + rank_sum = np.sum(df_h[h:-h, h:-h, h:-h]**2) + kin_test.append(1/2 * mpi.allreduce(rank_sum) / grid_size) + + f_h = f[fld].get() + lap_h = lap[fld].get() + + rank_sum = np.sum(- f_h[h:-h, h:-h, h:-h] * lap_h) + grad_test.append(1/2 * mpi.allreduce(rank_sum) / grid_size) + + energy_test = {} + energy_test['kinetic'] = np.array(kin_test) + energy_test['gradient'] = np.array(grad_test) + + phi = f[0].get()[h:-h, h:-h, h:-h] + chi = f[1].get()[h:-h, h:-h, h:-h] + pot_rank = np.sum(potential([phi, chi])) + energy_test['potential'] = np.array(mpi.allreduce(pot_rank) / grid_size) + + rtol = 1.e-14 if dtype == np.float64 else 1.e-5 + + for key, value in energy.items(): + assert np.allclose(value, energy_test[key], rtol=rtol, atol=0), \ + "%s energy inaccurate for nscalars=%d, grid_shape=%s, proc_shape=%s" \ + % (key, nscalars, str(grid_shape), str(proc_shape)) + + if timing: + from common import timer + t = timer(lambda: scalar_energy(queue, a=np.array(1.), + f=f, dfdt=dfdt, lap_f=lap)) + if mpi.rank == 0: + print("scalar energy took " + "%.3f ms for nscalars=%d, grid_shape=%s, proc_shape=%s" + % (t, nscalars, str(grid_shape), str(proc_shape))) + + +if __name__ == "__main__": + args = {'grid_shape': (256,)*3, 'proc_shape': (1,)*3, + 'dtype': np.float64, 'h': 2} + from common import get_exec_arg_dict + args.update(get_exec_arg_dict()) + test_scalar_energy(None, **args, timing=True) diff --git a/test/test_examples.py b/test/test_examples.py new file mode 100644 index 0000000..4820250 --- /dev/null +++ b/test/test_examples.py @@ -0,0 +1,61 @@ +__copyright__ = "Copyright (C) 2019 Zachary J Weiner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + + +import os +import subprocess +import pytest + +from pyopencl.tools import ( # noqa + pytest_generate_tests_for_pyopencl as pytest_generate_tests) + +examples = { + 'examples/phi_chi.py': 2.5e-7, +} + + +@pytest.mark.parametrize("filename, expected", examples.items()) +def test_examples(ctx_factory, grid_shape, proc_shape, filename, expected): + if proc_shape[0] * proc_shape[1] * proc_shape[2] > 1: + pytest.skip('run examples on only one rank') + + result = subprocess.run(['python', filename, 'end_time', '1'], + stdout=subprocess.PIPE) + + assert result.returncode == 0, '%s failed' % filename + + from glob import glob + from h5py import File + files = sorted(glob('20*.h5')) + f = File(files[-1], 'r') + constraint = f['energy/constraint'][-1] + print(filename, constraint) + f.close() + os.remove(files[-1]) + + assert constraint < expected, '%s constraint is wrong' % filename + + +if __name__ == "__main__": + args = {'grid_shape': (256,)*3, 'proc_shape': (1,)*3} + for example, expected in examples.items(): + test_examples(None, **args, filename=example, expected=expected) diff --git a/test/test_expansion.py b/test/test_expansion.py new file mode 100644 index 0000000..6bc3f09 --- /dev/null +++ b/test/test_expansion.py @@ -0,0 +1,82 @@ +__copyright__ = "Copyright (C) 2019 Zachary J Weiner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + + +import numpy as np +import pystella as ps +import pytest + +from pyopencl.tools import ( # noqa + pytest_generate_tests_for_pyopencl as pytest_generate_tests) + + +@pytest.mark.parametrize("dtype", [np.float64]) +@pytest.mark.parametrize("Stepper", [ps.RungeKutta4, ps.LowStorageRK54]) +def test_expansion(ctx_factory, proc_shape, dtype, Stepper, timing=False): + if proc_shape != (1, 1, 1): + pytest.skip("test expansion only on one rank") + + def sol(w, t): + x = (1 + 3*w) + return (x*(t/np.sqrt(3) + 2/x))**(2/x)/2**(2/x) + + from pystella.step import LowStorageRKStepper + is_low_storage = LowStorageRKStepper in Stepper.__bases__ + + for w in [0, 1/3, 1/2, 1, -1/4]: + def energy(a): + return a**(-1-3*w) + + def pressure(a): + return w * energy(a) + + t = 0 + dt = .005 + expand = ps.Expansion(energy(1.), Stepper, mpl=np.sqrt(8.*np.pi)) + + while t <= 10. - dt: + for s in range(expand.stepper.num_stages): + slc = (0) if is_low_storage else (0 if s == 0 else 1) + expand.step(s, energy(expand.a[slc]), pressure(expand.a[slc]), dt) + t += dt + + slc = () if is_low_storage else (0) + + order = expand.stepper.expected_order + rtol = dt**order + + print(order, + w, + expand.a[slc]/sol(w, t) - 1, + expand.constraint(energy(expand.a[slc]))) + + assert np.allclose(expand.a[slc], sol(w, t), rtol=rtol, atol=0), \ + "FLRW solution inaccurate for w=%f" % (w) + + assert expand.constraint(energy(expand.a[slc])) < rtol, \ + "FLRW solution disobeying constraint for w=%f" % (w) + + +if __name__ == "__main__": + from pystella.step import all_steppers + for stepper in all_steppers[-5:]: + test_expansion(None, (1, 1, 1), np.float64, stepper, timing=True) diff --git a/test/test_field.py b/test/test_field.py new file mode 100644 index 0000000..d98f9eb --- /dev/null +++ b/test/test_field.py @@ -0,0 +1,213 @@ +__copyright__ = "Copyright (C) 2019 Zachary J Weiner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + + +import pystella as ps +from pymbolic import parse, var +import pytest + + +def test_field(proc_shape): + if proc_shape != (1, 1, 1): + pytest.skip("test field only on one rank") + + y = ps.Field('y', offset='h') + result = ps.Indexer(y) + assert result == parse("y[i + h, j + h, k + h]") + + y = ps.Field('y', offset='h', indices=('a', 'b', 'c')) + result = ps.Indexer(y) + assert result == parse("y[a + h, b + h, c + h]") + + y = ps.Field('y', ignore_prepends=True) + result = ps.Indexer(y, prepend_with=(0, 1)) + assert result == parse("y[i, j, k]") + + y = ps.Field('y[4, 5]', ignore_prepends=True) + result = ps.Indexer(y, prepend_with=(0, 1)) + assert result == parse("y[4, 5, i, j, k]") + + y = ps.Field('y', ignore_prepends=True) + result = ps.Indexer(y[2, 3], prepend_with=(0, 1)) + assert result == parse("y[2, 3, i, j, k]") + + y = ps.Field('y[4, 5]', ignore_prepends=True) + result = ps.Indexer(y[2, 3], prepend_with=(0, 1)) + assert result == parse("y[2, 3, 4, 5, i, j, k]") + + y = ps.Field('y', ignore_prepends=False) + result = ps.Indexer(y, prepend_with=(0, 1)) + assert result == parse("y[0, 1, i, j, k]") + + y = ps.Field('y[4, 5]', ignore_prepends=False) + result = ps.Indexer(y, prepend_with=(0, 1)) + assert result == parse("y[0, 1, 4, 5, i, j, k]") + + y = ps.Field('y', ignore_prepends=False) + result = ps.Indexer(y[2, 3], prepend_with=(0, 1)) + assert result == parse("y[0, 1, 2, 3, i, j, k]") + + y = ps.Field('y[4, 5]', ignore_prepends=False) + result = ps.Indexer(y[2, 3], prepend_with=(0, 1)) + assert result == parse("y[0, 1, 2, 3, 4, 5, i, j, k]") + + y = ps.Field('y', offset=('hx', 'hy', 'hz')) + result = ps.Indexer(y.shift((1, 2, 3))) + assert result == parse("y[i + hx + 1, j + hy + 2, k + hz + 3]") + + y = ps.Field('y', offset=('hx', var('hy'), 'hz')) + result = ps.Indexer(y.shift((1, 2, var('a')))) + assert result == parse("y[i + hx + 1, j + hy + 2, k + hz + a]") + + +def test_dynamic_field(proc_shape): + if proc_shape != (1, 1, 1): + pytest.skip("test field only on one rank") + + y = ps.DynamicField('y', offset='h') + + result = ps.Indexer(y) + assert result == parse("y[i + h, j + h, k + h]") + + result = ps.Indexer(y.lap) + assert result == parse("lap_y[i, j, k]") + + result = ps.Indexer(y.dot) + assert result == parse("dydt[i + h, j + h, k + h]") + + result = ps.Indexer(y.pd[var('x')]) + assert result == parse("dydx[x, i, j, k]") + + result = ps.Indexer(y.d(1, 0)) + assert result == parse("dydt[1, i + h, j + h, k + h]") + + result = ps.Indexer(y.d(1, 1)) + assert result == parse("dydx[1, 0, i, j, k]") + + +def test_field_diff(proc_shape): + if proc_shape != (1, 1, 1): + pytest.skip("test field only on one rank") + + from pystella import diff + + y = ps.Field('y') + assert diff(y, y) == 1 + assert diff(y[0], y[0]) == 1 + assert diff(y[0], y[1]) == 0 + + y = ps.DynamicField('y') + assert diff(y, y) == 1 + assert diff(y[0], y[0]) == 1 + assert diff(y[0], y[1]) == 0 + + import pymbolic.primitives as pp + assert diff(y**3, y, 't') == pp.Product((3, 2, y, y.d(0))) + assert diff(y**3, 't', y) == pp.Product((3, y.d(0), 2, y)) + + for i, x in enumerate(['t', 'x', 'y', 'z']): + assert diff(y, x) == y.d(i) + assert diff(y[1, 3], x) == y.d(1, 3, i) + assert diff(y[1]**2, x) == 2 * y[1] * y.d(1, i) + + +def test_get_field_args(proc_shape): + if proc_shape != (1, 1, 1): + pytest.skip("test field only on one rank") + + from pystella import Field, get_field_args + + x = Field('x', offset=(1, 2, 3)) + y = Field('y', offset='h') + z = Field('z') + + from loopy import GlobalArg + true_args = [ + GlobalArg('x', shape='(Nx+2, Ny+4, Nz+6)'), + GlobalArg('y', shape='(Nx+2*h, Ny+2*h, Nz+2*h)'), + GlobalArg('z', shape='(Nx, Ny, Nz)'), + ] + + def lists_equal(a, b): + equal = True + for x in a: + equal *= x in b + for x in b: + equal *= x in a + return equal + + expressions = {x: y, y: x * z} + args = get_field_args(expressions) + assert lists_equal(args, true_args) + + expressions = x * y + z + args = get_field_args(expressions) + assert lists_equal(args, true_args) + + expressions = [x, y, y * z**2] + args = get_field_args(expressions) + assert lists_equal(args, true_args) + + +def test_sympy_interop(proc_shape): + if proc_shape != (1, 1, 1): + pytest.skip("test field only on one rank") + + from pystella.field.sympy import pymbolic_to_sympy, sympy_to_pymbolic + import sympy as sym + + f = ps.Field('f', offset='h') + g = ps.Field('g', offset='h') + + expr = f[0]**2 * g + 2 * g[1] * f + sympy_expr = pymbolic_to_sympy(expr) + new_expr = sympy_to_pymbolic(sympy_expr) + sympy_expr_2 = pymbolic_to_sympy(new_expr) + assert sym.simplify(sympy_expr - sympy_expr_2) == 0, \ + "sympy <-> pymbolic conversion not invertible" + + # from pymbolic.functions import fabs, exp, exmp1 + fabs = parse('math.fabs') + exp = parse('math.exp') + expm1 = parse('math.expm1') + x = sym.Symbol('x') + + expr = sym.Abs(x) + assert sympy_to_pymbolic(expr) == fabs(var('x')) + + expr = sym.exp(x) + assert sympy_to_pymbolic(expr) == exp(var('x')) + + expr = sym.Function('expm1')(x) + assert sympy_to_pymbolic(expr) == expm1(var('x')) + + expr = sym.Function('aaa')(x) + from pymbolic.primitives import Call, Variable + assert sympy_to_pymbolic(expr) == Call(Variable('aaa'), (Variable('x'),)) + + +if __name__ == "__main__": + test_field((1, 1, 1)) + test_dynamic_field((1, 1, 1)) + test_field_diff((1, 1, 1)) + test_get_field_args((1, 1, 1)) + test_sympy_interop((1, 1, 1)) diff --git a/test/test_multigrid.py b/test/test_multigrid.py new file mode 100644 index 0000000..42aca37 --- /dev/null +++ b/test/test_multigrid.py @@ -0,0 +1,122 @@ +__copyright__ = "Copyright (C) 2019 Zachary J Weiner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + + +import numpy as np +import pyopencl as cl +import pyopencl.clrandom as clr +import pystella as ps +import pytest + +from pyopencl.tools import ( # noqa + pytest_generate_tests_for_pyopencl as pytest_generate_tests) + +from pystella.multigrid import (FullApproximationScheme, MultiGridSolver, + NewtonIterator) + + +@pytest.mark.filterwarnings( + "ignore::pyopencl.characterize.CLCharacterizationWarning") +@pytest.mark.filterwarnings("ignore::loopy.diagnostic.LoopyAdvisory") +@pytest.mark.filterwarnings("ignore::loopy.diagnostic.ParameterFinderWarning") +@pytest.mark.parametrize("h", [1]) +@pytest.mark.parametrize("dtype", [np.float64]) +@pytest.mark.parametrize("Solver", [NewtonIterator]) +@pytest.mark.parametrize("MG", [FullApproximationScheme, MultiGridSolver]) +def test_multigrid(ctx_factory, grid_shape, proc_shape, h, dtype, Solver, MG, + timing=False): + if ctx_factory: + ctx = ctx_factory() + else: + ctx = ps.choose_device_and_make_context() + + queue = cl.CommandQueue(ctx) + rank_shape = tuple(Ni // pi for Ni, pi in zip(grid_shape, proc_shape)) + mpi = ps.DomainDecomposition(proc_shape, h, rank_shape) + + L = 10 + dx = L / grid_shape[0] + + statistics = ps.FieldStatistics(mpi, h, rank_shape=rank_shape, + grid_size=np.product(grid_shape)) + + def get_laplacian(f): + from pystella.derivs import _lap_coefs, centered_diff + lap_coefs = _lap_coefs[h] + from pymbolic import var + return sum([centered_diff(f, lap_coefs, direction=mu, order=2) + for mu in range(1, 4)]) / var('dx')**2 + + test_problems = {} + + from pystella import Field + f = Field('f', offset='h') + rho = Field('rho', offset='h') + test_problems[f] = (get_laplacian(f), rho) + + f = Field('f2', offset='h') + rho = Field('rho2', offset='h') + test_problems[f] = (get_laplacian(f) - f, rho) + + solver = Solver(mpi, queue, test_problems, h=h, dtype=dtype, + fixed_parameters=dict(omega=1/2)) + mg = MG(solver=solver, h=h, dtype=dtype) + + def zero_mean_array(): + f0 = clr.rand(queue, grid_shape, dtype) + f = clr.rand(queue, tuple(ni + 2*h for ni in rank_shape), dtype) + mpi.scatter_array(queue, f0, f, root=0) + avg = statistics(f)['mean'] + f = f - avg + mpi.share_halos(queue, f) + return f + + f = zero_mean_array() + rho = zero_mean_array() + + f2 = zero_mean_array() + rho2 = zero_mean_array() + + poisson_errs = [] + helmholtz_errs = [] + num_v_cycles = 15 if MG == MultiGridSolver else 10 + for i in range(num_v_cycles): + errs = mg(mpi, queue, dx0=dx, f=f, rho=rho, f2=f2, rho2=rho2) + poisson_errs.append(errs[-1][-1]['f']) + helmholtz_errs.append(errs[-1][-1]['f2']) + + for name, cycle_errs in zip(['poisson', 'helmholtz'], + [poisson_errs, helmholtz_errs]): + tol = 1.e-6 if MG == MultiGridSolver else 1.e-15 + assert cycle_errs[-1][1] < tol and cycle_errs[-2][1] < 10*tol, \ + "multigrid solution to %s eqn is inaccurate for " \ + "grid_shape=%s, h=%d, proc_shape=%s" \ + % (name, str(grid_shape), h, str(proc_shape)) + + +if __name__ == "__main__": + args = {'grid_shape': (128,)*3, 'proc_shape': (1,)*3, + 'dtype': np.float64, 'h': 1} + from common import get_exec_arg_dict + args.update(get_exec_arg_dict()) + test_multigrid(None, **args, Solver=NewtonIterator, MG=FullApproximationScheme, + timing=True) diff --git a/test/test_projectors.py b/test/test_projectors.py new file mode 100644 index 0000000..a830ba4 --- /dev/null +++ b/test/test_projectors.py @@ -0,0 +1,392 @@ +__copyright__ = "Copyright (C) 2019 Zachary J Weiner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + + +import numpy as np +import pyopencl as cl +import pyopencl.clmath as clm +import pyopencl.array as cla +import pystella as ps +from pystella.derivs import FirstCenteredDifference, SecondCenteredDifference +from pystella.fourier import gDFT +import pytest + +from pyopencl.tools import ( # noqa + pytest_generate_tests_for_pyopencl as pytest_generate_tests) + + +@pytest.mark.parametrize("h", [1, 2, 3, 4]) +@pytest.mark.parametrize("dtype", [np.float64]) +def test_effective_momenta(ctx_factory, grid_shape, proc_shape, h, dtype): + L = 10. + N = 128 + dx = 10 / N + dk = 2 * np.pi / L + k = np.linspace(-N//2+1, N//2+1, 100) + kmag = dk * k + + diff = 0 + stencil = FirstCenteredDifference(h) + for i, coef in stencil.coefs.items(): + x = dx * i + diff += coef * np.exp(1j * kmag * x) + diff += - coef * np.exp(- 1j * kmag * x) + + k_diff = np.real(diff / dx / 1j) + eff_k = stencil.get_eigenvalues(kmag, dx) + + assert np.max(np.abs(k_diff/eff_k - 1)) < 1.e-14 + + diff = 0 + stencil = SecondCenteredDifference(h) + for i, coef in stencil.coefs.items(): + x = dx * i + diff += coef * np.exp(1j * kmag * x) + if i > 0: + diff += coef * np.exp(- 1j * kmag * x) + + k_diff = np.real(diff / dx**2) + eff_k = stencil.get_eigenvalues(kmag, dx) + + assert np.max(np.abs(k_diff/eff_k - 1)) < 1.e-11 + + +def divergence_error(pdx): + div = clm.fabs(sum([pdx[mu] for mu in range(3)]))**2 + norm = sum([pdx[mu]**2 for mu in range(3)]) + div_norm = clm.sqrt(div) / clm.sqrt(norm) + max_err = cla.max(div_norm).get() + l2_err = cla.sum(div_norm).get() / div.size + return max_err, l2_err + + +def spectral_divergence_error(vector): + if isinstance(vector, cla.Array): + vector = vector.get() + if isinstance(vector, list): + N = vector[0].shape[0] + else: + N = vector.shape[1] + + pts = np.concatenate([np.arange(0, N//2+1), np.arange(-N//2+1, 0)]) + pts = pts.astype(np.float64) + pts[N//2] = 0. # Nyquist modes have zero first derivative + kvecs = np.meshgrid(pts, pts, pts[:N//2+1], indexing='ij') + + div = sum([kvecs[mu] * vector[mu] for mu in range(3)]) + norm = sum([np.abs(kvecs[mu] * vector[mu])**2 for mu in range(3)]) + div_norm = np.abs(div[norm != 0] / np.sqrt(norm[norm != 0])) + + # filter out modes where norm is tiny but not zero + div_norm = div_norm[div_norm < .99] + + max_err = np.max(div_norm) + l2_err = np.sum(div_norm) / div_norm.size + return max_err, l2_err + + +def is_hermitian(fk): + if isinstance(fk, cla.Array): + fk = fk.get() + + grid_shape = list(fk.shape) + grid_shape[-1] = 2 * (grid_shape[-1] - 1) + pos = [np.arange(0, Ni//2+1) for Ni in grid_shape] + neg = [np.concatenate([np.array([0]), np.arange(Ni-1, Ni//2-1, -1)]) + for Ni in grid_shape] + + test = np.array([]) + for k in [0, grid_shape[-1]//2]: + for n, p in zip(neg[0], pos[0]): + test = np.append(test, np.allclose(fk[n, neg[1], k], + np.conj(fk[p, pos[1], k]), + atol=0, rtol=1.e-12)) + test = np.append(test, np.allclose(fk[p, neg[1], k], + np.conj(fk[n, pos[1], k]), + atol=0, rtol=1.e-12)) + for n, p in zip(neg[1], pos[1]): + test = np.append(test, np.allclose(fk[neg[0], n, k], + np.conj(fk[pos[0], p, k]), + atol=0, rtol=1.e-12)) + test = np.append(test, np.allclose(fk[neg[0], p, k], + np.conj(fk[pos[0], n, k]), + atol=0, rtol=1.e-12)) + + for i in [0, grid_shape[0]//2]: + for j in [0, grid_shape[1]//2]: + for k in [0, grid_shape[2]//2]: + test = np.append(test, [np.abs(np.imag(fk[i, j, k])) < 1.e-15]) + return test.all() + + +def make_data(queue, fft): + kshape = fft.shape(True) + data = np.random.rand(*kshape) + 1j * np.random.rand(*kshape) + if isinstance(fft, gDFT): + from pystella.fourier.rayleigh import make_hermitian + data = make_hermitian(data).astype(np.complex128) + + data = fft.zero_corner_modes(data) + return cla.to_device(queue, data) + + +@pytest.mark.filterwarnings( + "ignore::pyopencl.characterize.CLCharacterizationWarning") +@pytest.mark.filterwarnings("ignore::loopy.diagnostic.LoopyAdvisory") +@pytest.mark.parametrize("h", [0, 2]) +@pytest.mark.parametrize("pol", [False, True]) +@pytest.mark.parametrize("dtype", [np.float64]) +def test_vector_projector(ctx_factory, grid_shape, proc_shape, h, dtype, pol, + timing=False): + if proc_shape[0] * proc_shape[1] * proc_shape[2] > 1 and h == 0: + pytest.skip("can't test continuum projectors on multiple ranks yet") + + if ctx_factory: + ctx = ctx_factory() + else: + ctx = ps.choose_device_and_make_context() + + queue = cl.CommandQueue(ctx) + rank_shape = tuple(Ni // pi for Ni, pi in zip(grid_shape, proc_shape)) + mpi = ps.DomainDecomposition(proc_shape, h, rank_shape) + + L = (10,)*3 + dx = tuple(Li / Ni for Li, Ni in zip(L, grid_shape)) + + fft = ps.DFT(mpi, ctx, queue, grid_shape, dtype) + cdtype = fft.cdtype + if h > 0: + stencil = FirstCenteredDifference(h) + project = ps.Projector(fft, stencil.get_eigenvalues) + else: + project = ps.Projector(fft, lambda k, dx: k) + + k_shape = fft.shape(True) + vector = cla.empty(queue, (3,)+k_shape, cdtype) + + if pol: + plus = make_data(queue, fft).astype(cdtype) + minus = make_data(queue, fft).astype(cdtype) + + vector = cla.empty(queue, (3,)+k_shape, cdtype) + project.pol_to_vec(queue, plus, minus, vector) + + if isinstance(fft, gDFT): + for i in range(3): + assert is_hermitian(vector[i]), \ + "pol->vec projection is non-hermitian for grid_shape=%s, h=%d" \ + % (str(grid_shape), h) + + plus1 = cla.zeros_like(plus) + minus1 = cla.zeros_like(minus) + + project.vec_to_pol(queue, plus1, minus1, vector) + + if isinstance(fft, gDFT): + assert is_hermitian(plus1), \ + "plus polarization is not hermitian for grid_shape=%s, h=%d" % \ + (str(grid_shape), h) + assert is_hermitian(minus1), \ + "minus polarization is not hermitian for grid_shape=%s, h=%d" % \ + (str(grid_shape), h) + + assert np.allclose(plus1.get(), plus.get(), atol=0., rtol=1.e-11) and \ + np.allclose(minus1.get(), minus.get(), atol=0., rtol=1.e-11), \ + "pol->vec->pol is not identity mapping for grid_shape=%s, h=%d" % \ + (str(grid_shape), h) + + else: + for mu in range(3): + vector[mu] = make_data(queue, fft).astype(cdtype) + + # apply twice to ensure smallness + project.transversify(queue, vector) + project.transversify(queue, vector) + + # h=0 performs "continuum" projection + if h == 0: + max_err, l2_err = spectral_divergence_error(vector) + max_rtol = 1.e-12 if dtype == np.float64 else 1.e-4 + l2_rtol = 1.e-14 if dtype == np.float64 else 1.e-6 + else: + vector_x = cla.empty(queue, (3,)+tuple(ni+2*h for ni in rank_shape), dtype) + pdx = cla.empty(queue, (3,)+rank_shape, dtype) + + derivs = ps.GradientLaplacian(mpi, h, dx) + + for mu in range(3): + fft.idft(vector[mu], vector_x[mu]) + + derivs(queue, fx=vector_x[0], pdx=pdx[0]) + derivs(queue, fx=vector_x[1], pdy=pdx[1]) + derivs(queue, fx=vector_x[2], pdz=pdx[2]) + + max_err, l2_err = divergence_error(pdx) + max_rtol = 1.e-10 if dtype == np.float64 else 1.e-4 + l2_rtol = 1.e-14 if dtype == np.float64 else 1.e-6 + + assert max_err < max_rtol and l2_err < l2_rtol, \ + "%s projection result is not transverse for grid_shape=%s, h=%d" % \ + ("pol_to_vec" if pol else "transversify", str(grid_shape), h) + + if timing: + from common import timer + ntime = 10 + if pol: + t = timer(lambda: project.pol_to_vec(queue, plus, minus, vector), + ntime=ntime) + else: + t = timer(lambda: project.transversify(queue, vector), ntime=ntime) + print("%s took %.3f ms for grid_shape=%s" + % ("pol_to_vec" if pol else "transversify", t, str(grid_shape))) + + +def tensor_id(i, j): + a = i if i <= j else j + b = j if i <= j else i + return (7 - a) * a // 2 - 4 + b + + +@pytest.mark.filterwarnings( + "ignore::pyopencl.characterize.CLCharacterizationWarning") +@pytest.mark.filterwarnings("ignore::loopy.diagnostic.LoopyAdvisory") +@pytest.mark.parametrize("h", [0, 2]) +@pytest.mark.parametrize("pol", [False, True]) +@pytest.mark.parametrize("dtype", [np.float64]) +def test_tensor_projector(ctx_factory, grid_shape, proc_shape, h, dtype, pol, + timing=False): + if proc_shape[0] * proc_shape[1] * proc_shape[2] > 1 and h == 0: + pytest.skip("can't test continuum projectors on multiple ranks yet") + if pol: + pytest.skip("No tensor polarization projector yet") + + if ctx_factory: + ctx = ctx_factory() + else: + ctx = ps.choose_device_and_make_context() + + queue = cl.CommandQueue(ctx) + rank_shape = tuple(Ni // pi for Ni, pi in zip(grid_shape, proc_shape)) + mpi = ps.DomainDecomposition(proc_shape, h, rank_shape) + + L = (10,)*3 + dx = tuple(Li / Ni for Li, Ni in zip(L, grid_shape)) + + fft = ps.DFT(mpi, ctx, queue, grid_shape, dtype) + cdtype = fft.cdtype + if h > 0: + stencil = FirstCenteredDifference(h) + project = ps.Projector(fft, stencil.get_eigenvalues) + else: + project = ps.Projector(fft, lambda k, dx: k) + + k_shape = fft.shape(True) + hij = cla.empty(queue, shape=(6,)+k_shape, dtype=cdtype) + + if pol: + pass + else: + for mu in range(6): + hij[mu] = make_data(queue, fft).astype(cdtype) + + project.transverse_traceless(queue, hij) + + hij_h = hij.get() + + if isinstance(fft, gDFT): + for i in range(6): + assert is_hermitian(hij_h[i]), \ + "TT projection is non-hermitian for grid_shape=%s, h=%d" \ + % (str(grid_shape), h) + + trace = sum([hij_h[tensor_id(i, i)] for i in range(1, 4)]) + tracenorm = np.sqrt(sum([np.abs(hij_h[tensor_id(i, i)])**2 + for i in range(1, 4)])) + + trace = np.abs(trace[tracenorm != 0]) / tracenorm[tracenorm != 0] + trace = trace[trace < .9] + max_err = np.max(trace) + l2_err = np.sum(trace) / trace.size + + max_rtol = 1.e-9 if dtype == np.float64 else 1.e-4 + l2_rtol = 1.e-14 if dtype == np.float64 else 1.e-6 + + assert max_err < max_rtol and l2_err < l2_rtol, \ + "TT projected tensor isn't traceless for grid_shape=%s, h=%d" \ + % (str(grid_shape), h) + + # h=0 performs "continuum" projection + if h == 0: + for i in range(1, 4): + vector_h = [hij_h[tensor_id(i, j)] for j in range(1, 4)] + max_err, l2_err = spectral_divergence_error(vector_h) + max_rtol = 1.e-9 if dtype == np.float64 else 1.e-4 + l2_rtol = 1.e-14 if dtype == np.float64 else 1.e-6 + + assert max_err < max_rtol and l2_err < l2_rtol, \ + "TT projection is not transverse for grid_shape=%s, h=%d" \ + % (str(grid_shape), h) + + else: + vector_x = cla.empty(queue, (3,)+tuple(ni+2*h for ni in rank_shape), dtype) + pdx = cla.empty(queue, (3,)+rank_shape, dtype) + + derivs = ps.GradientLaplacian(mpi, h, dx) + + fft = ps.DFT(mpi, ctx, queue, grid_shape, dtype) + + for i in range(1, 4): + vector = [hij[tensor_id(i, j)] for j in range(1, 4)] + + for mu in range(3): + fft.idft(vector[mu], vector_x[mu]) + + derivs(queue, fx=vector_x[0], pdx=pdx[0]) + derivs(queue, fx=vector_x[1], pdy=pdx[1]) + derivs(queue, fx=vector_x[2], pdz=pdx[2]) + + max_err, l2_err = divergence_error(pdx) + max_rtol = 1.e-10 if dtype == np.float64 else 1.e-4 + l2_rtol = 1.e-14 if dtype == np.float64 else 1.e-6 + + assert max_err < max_rtol and l2_err < l2_rtol, \ + "TT projection is not transverse for grid_shape=%s, h=%d" \ + % (str(grid_shape), h) + + if timing: + from common import timer + ntime = 10 + t = timer(lambda: project.transverse_traceless(queue, hij), ntime=ntime) + print("TT projection took %.3f ms for grid_shape=%s" % (t, str(grid_shape))) + + +if __name__ == "__main__": + args = {'grid_shape': (256,)*3, 'proc_shape': (1,)*3, 'dtype': np.float64} + from common import get_exec_arg_dict + args.update(get_exec_arg_dict()) + for h in range(1, 5): + test_effective_momenta(None, **args, h=h) + for h in range(0, 5): + test_vector_projector(None, **args, h=h, pol=False, timing=True) + test_vector_projector(None, **args, h=h, pol=True, timing=True) + for h in range(0, 5): + test_tensor_projector(None, **args, h=h, pol=False, timing=True) diff --git a/test/test_rayleigh.py b/test/test_rayleigh.py new file mode 100644 index 0000000..00127e1 --- /dev/null +++ b/test/test_rayleigh.py @@ -0,0 +1,169 @@ +__copyright__ = "Copyright (C) 2019 Zachary J Weiner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + + +import numpy as np +import pyopencl as cl +import pyopencl.array as cla +import pystella as ps +import pytest +# pylint: disable=no-member + +from pyopencl.tools import ( # noqa + pytest_generate_tests_for_pyopencl as pytest_generate_tests) + + +@pytest.mark.parametrize("dtype", [np.float64, np.float32]) +@pytest.mark.parametrize("random", [True, False]) +def test_generate_WKB(ctx_factory, grid_shape, proc_shape, dtype, random, + timing=False): + if ctx_factory: + ctx = ctx_factory() + else: + ctx = ps.choose_device_and_make_context() + + queue = cl.CommandQueue(ctx) + h = 1 + rank_shape = tuple(Ni // pi for Ni, pi in zip(grid_shape, proc_shape)) + mpi = ps.DomainDecomposition(proc_shape, h, rank_shape) + + fft = ps.DFT(mpi, ctx, queue, grid_shape, dtype) + + L = (10,)*3 + volume = np.product(L) + dk = tuple(2 * np.pi / Li for Li in L) + modes = ps.RayleighGenerator(ctx, fft, dk, volume) + + # only checking that this call is successful + fk, dfk = modes.generate_WKB(queue, random=random) + + if timing: + ntime = 10 + from common import timer + t = timer(lambda: modes.generate_WKB(queue, random=random), ntime=ntime) + print("%srandom, set_modes took %.3f ms for grid_shape=%s" + % ('' if random else 'non-', t, str(grid_shape))) + + +@pytest.mark.parametrize("dtype", [np.float64, np.float32]) +@pytest.mark.parametrize("random", [True, False]) +def test_generate(ctx_factory, grid_shape, proc_shape, dtype, random, timing=False): + if ctx_factory: + ctx = ctx_factory() + else: + ctx = ps.choose_device_and_make_context() + + queue = cl.CommandQueue(ctx) + h = 1 + rank_shape = tuple(Ni // pi for Ni, pi in zip(grid_shape, proc_shape)) + mpi = ps.DomainDecomposition(proc_shape, h, rank_shape) + + fft = ps.DFT(mpi, ctx, queue, grid_shape, dtype) + + num_bins = int(sum(Ni**2 for Ni in grid_shape)**.5 / 2 + .5) + 1 + L = (10,)*3 + volume = np.product(L) + dk = tuple(2 * np.pi / Li for Li in L) + spectra = ps.PowerSpectra(mpi, fft, dk, volume) + modes = ps.RayleighGenerator(ctx, fft, dk, volume) + + kbins = min(dk) * np.arange(0, num_bins) + test_norm = 1 / 2 / np.pi**2 / np.product(grid_shape)**2 + + for exp in [-1, -2, -3]: + def power(k): + return k**exp + + fk = modes.generate(queue, random=random, norm=1, field_ps=power) + + spectrum = spectra.norm * spectra.bin_power(fk, queue=queue, k_power=3)[1:-1] + true_spectrum = test_norm * kbins[1:-1]**3 * power(kbins[1:-1]) + err = np.abs(1 - spectrum / true_spectrum) + + tol = .1 if num_bins < 64 else .3 + assert np.max(err[num_bins//4:-4]) < tol and np.average(err) < tol, \ + "init power spectrum incorrect for %srandom k**%d" \ + % ('' if random else 'non-', exp) + + fx = fft.idft(cla.to_device(queue, fk)) + if isinstance(fx, np.ndarray): + fx = cla.to_device(queue, fx) + + avg = mpi.allreduce(cla.sum(fx).get()) / np.product(grid_shape) + var = mpi.allreduce(cla.sum(fx**2).get()) / np.product(grid_shape) - avg**2 + skew = mpi.allreduce(cla.sum(fx**3).get()) / np.product(grid_shape) \ + - 3 * avg * var - avg**3 + assert skew / var**1.5 < .1, \ + "init power spectrum has large skewness for %srandom k**%d" \ + % ('' if random else 'non-', exp) + + if timing: + ntime = 10 + from common import timer + t = timer(lambda: modes.generate(queue, random=random), ntime=ntime) + print("%srandom, set_modes took %.3f ms for grid_shape=%s" + % ('' if random else 'non-', t, str(grid_shape))) + + +@pytest.mark.parametrize("dtype", [np.float64]) +def test_make_hermitian(ctx_factory, grid_shape, proc_shape, dtype): + if proc_shape != (1, 1, 1): + pytest.skip("test make_hermitian only on one rank") + + kshape = (grid_shape[0], grid_shape[1], grid_shape[2]//2 + 1) + data = np.random.rand(*kshape) + 1j * np.random.rand(*kshape) + + from pystella.fourier.rayleigh import make_hermitian + data = make_hermitian(data) + + pos = [np.arange(0, Ni//2+1) for Ni in grid_shape] + neg = [np.concatenate([np.array([0]), np.arange(Ni-1, Ni//2-1, -1)]) + for Ni in grid_shape] + + for k in [0, grid_shape[-1]//2]: + for n, p in zip(neg[0], pos[0]): + assert (data[n, neg[1], k] == np.conj(data[p, pos[1], k])).all(), \ + "Hermitian symmetry failed for data[:, :, %s]" % (k,) + assert (data[p, neg[1], k] == np.conj(data[n, pos[1], k])).all(), \ + "Hermitian symmetry failed for data[:, :, %s]" % (k,) + for n, p in zip(neg[1], pos[1]): + assert (data[neg[0], n, k] == np.conj(data[pos[0], p, k])).all(), \ + "Hermitian symmetry failed for data[:, :, %s]" % (k,) + assert (data[neg[0], p, k] == np.conj(data[pos[0], n, k])).all(), \ + "Hermitian symmetry failed for data[:, :, %s]" % (k,) + + # check modes are real + # their k-space indices are also their array indices + for i in [0, grid_shape[0]//2]: + for j in [0, grid_shape[1]//2]: + assert np.imag(data[i, j, k]) == 0, \ + "data[%s, %s, %s] is not real" % (i, j, k) + + +if __name__ == "__main__": + args = {'grid_shape': (256,)*3, 'proc_shape': (1, 1, 1), 'dtype': np.float64} + from common import get_exec_arg_dict + args.update(get_exec_arg_dict()) + test_make_hermitian(None, **args) + for random in [True, False]: + test_generate_WKB(None, **args, random=random, timing=True) + test_generate(None, **args, random=random, timing=True) diff --git a/test/test_reduction.py b/test/test_reduction.py new file mode 100644 index 0000000..bd69a27 --- /dev/null +++ b/test/test_reduction.py @@ -0,0 +1,200 @@ +__copyright__ = "Copyright (C) 2019 Zachary J Weiner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + + +import numpy as np +import pyopencl as cl +import pyopencl.clrandom as clr +import pystella as ps +import pytest + +from pyopencl.tools import ( # noqa + pytest_generate_tests_for_pyopencl as pytest_generate_tests) + + +@pytest.mark.parametrize("dtype", [np.float64, np.float32]) +@pytest.mark.parametrize("op", ['sum', 'max']) +@pytest.mark.parametrize("_grid_shape", [None, (128, 64, 32)]) +@pytest.mark.parametrize("pass_grid_dims", [True, False]) +def test_reduction(ctx_factory, grid_shape, proc_shape, dtype, op, + _grid_shape, pass_grid_dims, timing=False): + if ctx_factory: + ctx = ctx_factory() + else: + ctx = ps.choose_device_and_make_context() + + queue = cl.CommandQueue(ctx) + h = 1 + grid_shape = _grid_shape or grid_shape + rank_shape = tuple(Ni // pi for Ni, pi in zip(grid_shape, proc_shape)) + mpi = ps.DomainDecomposition(proc_shape, h, rank_shape) + + from pystella import Field + reducers = {} + reducers['avg'] = [(Field('f'), op)] + + if pass_grid_dims: + reducer = ps.Reduction(mpi, reducers, rank_shape=rank_shape, + grid_size=np.product(grid_shape)) + else: + reducer = ps.Reduction(mpi, reducers) + + f = clr.rand(queue, rank_shape, dtype=dtype) + result = reducer(queue, f=f) + avg = result['avg'] + + avg_test = reducer.reduce_array(f, op) + if op == 'sum': + avg_test /= np.product(grid_shape) + + rtol = 5.e-14 if dtype == np.float64 else 1.e-5 + assert np.allclose(avg, avg_test, rtol=rtol, atol=0), \ + "%s reduction innaccurate for grid_shape=%s, proc_shape=%s" \ + % (op, str(grid_shape), str(proc_shape)) + + if timing: + from common import timer + t = timer(lambda: reducer(queue, f=f), ntime=1000) + if mpi.rank == 0: + print("reduction took %.3f ms for grid_shape=%s, proc_shape=%s" + % (t, str(grid_shape), str(proc_shape))) + print("Bandwidth = %.1f GB/s" + % ((f.nbytes)/1024**3 / t * 1000)) + + +@pytest.mark.parametrize("dtype", [np.float64]) +@pytest.mark.parametrize("op", ['sum']) +@pytest.mark.parametrize("_grid_shape", [None, (128, 64, 32)]) +def test_reduction_with_new_shape(ctx_factory, grid_shape, proc_shape, dtype, op, + _grid_shape, timing=False): + if ctx_factory: + ctx = ctx_factory() + else: + ctx = ps.choose_device_and_make_context() + + queue = cl.CommandQueue(ctx) + h = 1 + grid_shape = _grid_shape or grid_shape + rank_shape = tuple(Ni // pi for Ni, pi in zip(grid_shape, proc_shape)) + mpi = ps.DomainDecomposition(proc_shape, h, rank_shape) + + from pystella import Field + reducers = {} + reducers['avg'] = [(Field('f'), op)] + + reducer = ps.Reduction(mpi, reducers) + + f = clr.rand(queue, rank_shape, dtype=dtype) + result = reducer(queue, f=f) + avg = result['avg'] + + avg_test = reducer.reduce_array(f, op) + if op == 'sum': + avg_test /= np.product(grid_shape) + + rtol = 5.e-14 if dtype == np.float64 else 1.e-5 + assert np.allclose(avg, avg_test, rtol=rtol, atol=0), \ + "%s reduction innaccurate for grid_shape=%s, proc_shape=%s" \ + % (op, str(grid_shape), str(proc_shape)) + + # test call to reducer with new shape + grid_shape = tuple(Ni // 2 for Ni in grid_shape) + rank_shape = tuple(Ni // pi for Ni, pi in zip(grid_shape, proc_shape)) + f = clr.rand(queue, rank_shape, dtype=dtype) + result = reducer(queue, f=f) + avg = result['avg'] + + avg_test = reducer.reduce_array(f, op) + if op == 'sum': + avg_test /= np.product(grid_shape) + + rtol = 5.e-14 if dtype == np.float64 else 1.e-5 + assert np.allclose(avg, avg_test, rtol=rtol, atol=0), \ + "%s reduction w/new shape innaccurate for grid_shape=%s, proc_shape=%s" \ + % (op, str(grid_shape), str(proc_shape)) + + +@pytest.mark.parametrize("dtype", [np.float64]) +@pytest.mark.parametrize("_grid_shape", [None, (128, 64, 32)]) +@pytest.mark.parametrize("pass_grid_dims", [True, False]) +def test_field_statistics(ctx_factory, grid_shape, proc_shape, dtype, _grid_shape, + pass_grid_dims, timing=False): + if ctx_factory: + ctx = ctx_factory() + else: + ctx = ps.choose_device_and_make_context() + + queue = cl.CommandQueue(ctx) + h = 1 + grid_shape = _grid_shape or grid_shape + rank_shape = tuple(Ni // pi for Ni, pi in zip(grid_shape, proc_shape)) + mpi = ps.DomainDecomposition(proc_shape, h, rank_shape) + + # make select parameters local for convenience + h = 2 + f = clr.rand(queue, (2, 1)+tuple(ni + 2*h for ni in rank_shape), dtype=dtype) + + if pass_grid_dims: + statistics = ps.FieldStatistics(mpi, h, rank_shape=rank_shape, + grid_size=np.product(grid_shape)) + else: + statistics = ps.FieldStatistics(mpi, h) + + stats = statistics(f) + avg = stats['mean'] + var = stats['variance'] + + f_h = f.get() + rank_sum = np.sum(f_h[..., h:-h, h:-h, h:-h], axis=(-3, -2, -1)) + avg_test = mpi.allreduce(rank_sum) / np.product(grid_shape) + + rank_sum = np.sum(f_h[..., h:-h, h:-h, h:-h]**2, axis=(-3, -2, -1)) + var_test = mpi.allreduce(rank_sum) / np.product(grid_shape) - avg_test**2 + + rtol = 5.e-14 if dtype == np.float64 else 1.e-5 + + assert np.allclose(avg, avg_test, rtol=rtol, atol=0), \ + "average innaccurate for grid_shape=%s, proc_shape=%s" \ + % (str(grid_shape), str(proc_shape)) + + assert np.allclose(var, var_test, rtol=rtol, atol=0), \ + "variance innaccurate for grid_shape=%s, proc_shape=%s" \ + % (str(grid_shape), str(proc_shape)) + + if timing: + from common import timer + t = timer(lambda: statistics(f)) + if mpi.rank == 0: + print("field stats took " + "%.3f ms for outer shape %s, grid_shape=%s, proc_shape=%s" + % (t, f.shape[:-3], str(grid_shape), str(proc_shape))) + + +if __name__ == "__main__": + args = {'grid_shape': (256,)*3, 'proc_shape': (1,)*3, + 'dtype': np.float64, '_grid_shape': None} + from common import get_exec_arg_dict + args.update(get_exec_arg_dict()) + for op in ['sum', 'max']: + test_reduction(None, **args, op=op, pass_grid_dims=True, timing=True) + test_reduction_with_new_shape(None, **args, op='sum') + test_field_statistics(None, **args, pass_grid_dims=True, timing=True) diff --git a/test/test_relax.py b/test/test_relax.py new file mode 100644 index 0000000..ea90126 --- /dev/null +++ b/test/test_relax.py @@ -0,0 +1,139 @@ +__copyright__ = "Copyright (C) 2019 Zachary J Weiner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + + +import numpy as np +import pyopencl as cl +import pyopencl.array as cla +import pyopencl.clrandom as clr +import pystella as ps +import pytest + +from pyopencl.tools import ( # noqa + pytest_generate_tests_for_pyopencl as pytest_generate_tests) + +from pystella.multigrid import JacobiIterator, NewtonIterator + + +@pytest.mark.parametrize("h", [1]) +@pytest.mark.parametrize("dtype", [np.float64]) +@pytest.mark.parametrize("Solver", [JacobiIterator, NewtonIterator]) +def test_relax(ctx_factory, grid_shape, proc_shape, h, dtype, Solver, timing=False): + if ctx_factory: + ctx = ctx_factory() + else: + ctx = ps.choose_device_and_make_context() + + queue = cl.CommandQueue(ctx) + rank_shape = tuple(Ni // pi for Ni, pi in zip(grid_shape, proc_shape)) + mpi = ps.DomainDecomposition(proc_shape, h, rank_shape) + + L = 10 + dx = L / grid_shape[0] + dk = 2 * np.pi / L + + fft = ps.DFT(mpi, ctx, queue, grid_shape, dtype) + spectra = ps.PowerSpectra(mpi, fft, (dk,)*3, L**3) + statistics = ps.FieldStatistics(mpi, h, rank_shape=rank_shape, + grid_size=np.product(grid_shape)) + + def get_laplacian(f): + from pystella.derivs import _lap_coefs, centered_diff + lap_coefs = _lap_coefs[h] + from pymbolic import var + return sum([centered_diff(f, lap_coefs, direction=mu, order=2) + for mu in range(1, 4)]) / var('dx')**2 + + test_problems = {} + + from pystella import Field + f = Field('f', offset='h') + rho = Field('rho', offset='h') + test_problems[f] = (get_laplacian(f), rho) + + f = Field('f2', offset='h') + rho = Field('rho2', offset='h') + test_problems[f] = (get_laplacian(f) - f, rho) + + solver = Solver(mpi, queue, test_problems, h=h, dtype=dtype, + fixed_parameters=dict(omega=1/2)) + + def zero_mean_array(): + f0 = clr.rand(queue, grid_shape, dtype) + f = clr.rand(queue, tuple(ni + 2*h for ni in rank_shape), dtype) + mpi.scatter_array(queue, f0, f, root=0) + avg = statistics(f)['mean'] + f = f - avg + mpi.share_halos(queue, f) + return f + + f = zero_mean_array() + rho = zero_mean_array() + tmp = cla.zeros_like(f) + + f2 = zero_mean_array() + rho2 = zero_mean_array() + tmp2 = cla.zeros_like(f) + + num_iterations = 1000 + errors = {'f': [], 'f2': []} + first_mode_zeroed = {'f': [], 'f2': []} + for i in range(0, num_iterations, 2): + solver(mpi, queue, iterations=2, dx=np.array(dx), + f=f, tmp_f=tmp, rho=rho, + f2=f2, tmp_f2=tmp2, rho2=rho2) + + err = solver.get_error(queue, + f=f, r_f=tmp, rho=rho, + f2=f2, r_f2=tmp2, rho2=rho2, dx=np.array(dx)) + for k, v in err.items(): + errors[k].append(v) + + for key, resid in zip(['f', 'f2'], [tmp, tmp2]): + spectrum = spectra(resid, k_power=0) + if mpi.rank == 0: + max_amp = np.max(spectrum) + first_zero = np.argmax(spectrum[1:] < 1.e-30 * max_amp) + first_mode_zeroed[key].append(first_zero) + + for k, errs in errors.items(): + errs = np.array(errs) + iters = np.arange(1, errs.shape[0]+1) + assert (errs[10:, 0] * iters[10:] / errs[0, 0] < 1.).all(), \ + "relaxation not converging at least linearly for " \ + "grid_shape=%s, h=%d, proc_shape=%s" \ + % (str(grid_shape), h, str(proc_shape)) + + first_mode_zeroed = mpi.bcast(first_mode_zeroed, root=0) + for k, x in first_mode_zeroed.items(): + x = np.array(list(x))[2:] + assert (x[1:] <= x[:-1]).all() and np.min(x) < np.max(x) / 5, \ + "relaxation not smoothing error grid_shape=%s, h=%d, proc_shape=%s" \ + % (str(grid_shape), h, str(proc_shape)) + + +if __name__ == "__main__": + args = {'grid_shape': (128,)*3, 'proc_shape': (1,)*3, + 'dtype': np.float64, 'h': 1} + from common import get_exec_arg_dict + args.update(get_exec_arg_dict()) + test_relax(None, **args, Solver=NewtonIterator, timing=True) diff --git a/test/test_spectra.py b/test/test_spectra.py new file mode 100644 index 0000000..c470c7e --- /dev/null +++ b/test/test_spectra.py @@ -0,0 +1,198 @@ +__copyright__ = "Copyright (C) 2019 Zachary J Weiner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + + +import numpy as np +import pyopencl as cl +import pyopencl.array as cla +import pystella as ps +import pytest + +from pyopencl.tools import ( # noqa + pytest_generate_tests_for_pyopencl as pytest_generate_tests) + + +def make_data(*shape): + return np.random.rand(*shape) + 1j * np.random.rand(*shape) + + +def make_hermitian(data, fft): + from pystella.fourier import gDFT + if isinstance(fft, gDFT): + from pystella.fourier.rayleigh import make_hermitian + data = make_hermitian(data) + data = fft.zero_corner_modes(data) + return data + + +@pytest.mark.filterwarnings( + "ignore::pyopencl.characterize.CLCharacterizationWarning") +@pytest.mark.filterwarnings("ignore::loopy.diagnostic.LoopyAdvisory") +@pytest.mark.parametrize("dtype", [np.float64, np.float32]) +@pytest.mark.parametrize("L", [(10,)*3, (10, 7, 8), (3, 8, 19), (13.2, 5.71, 9.4), + (11, 11, 4), (4, 11, 11), (11, 4, 11)]) +def test_spectra(ctx_factory, grid_shape, proc_shape, dtype, L, timing=False): + if ctx_factory: + ctx = ctx_factory() + else: + ctx = ps.choose_device_and_make_context() + + queue = cl.CommandQueue(ctx) + h = 1 + rank_shape = tuple(Ni // pi for Ni, pi in zip(grid_shape, proc_shape)) + mpi = ps.DomainDecomposition(proc_shape, h, rank_shape) + + fft = ps.DFT(mpi, ctx, queue, grid_shape, dtype) + + L = L or (3, 5, 7) + dk = tuple(2 * np.pi / Li for Li in L) + cdtype = fft.cdtype + spec = ps.PowerSpectra(mpi, fft, dk, np.product(L), bin_width=min(dk)+.001) + # FIXME: bin_width=min(dk) sometimes disagrees to O(.1%) with numpy... + + assert int(np.sum(spec.bin_counts)) == np.product(grid_shape), \ + "bin counts don't sum to total number of points/modes" + + k_power = 2. + fk = make_data(*fft.shape(True)).astype(cdtype) + + fk_d = cla.to_device(queue, fk) + spectrum = spec.bin_power(fk_d, k_power=k_power, is_real=True) + bins = np.arange(-.5, spec.num_bins + .5) * spec.bin_width + + sub_k = list(x.get() for x in fft.sub_k.values()) + kvecs = np.meshgrid(*sub_k, indexing='ij', sparse=False) + kmags = np.sqrt(sum((dki * ki)**2 for dki, ki in zip(dk, kvecs))) + + counts = 2. * np.ones_like(kmags) + counts[kvecs[2] == 0] = 1 + counts[kvecs[2] == grid_shape[-1]//2] = 1 + + max_rtol = 1.e-8 if dtype == np.float64 else 2.e-2 + avg_rtol = 1.e-11 if dtype == np.float64 else 2.e-4 + + bin_counts2 = spec.bin_power(np.ones_like(fk), queue=queue, k_power=0) + assert np.max(np.abs(bin_counts2 - 1)) < max_rtol, \ + "bin counting disagrees between PowerSpectra and np.histogram" + + hist = np.histogram(kmags, bins=bins, + weights=np.abs(fk)**2 * counts * kmags**k_power)[0] + hist = mpi.allreduce(hist) / spec.bin_counts + + # skip the Nyquist mode and the zero mode + err = np.abs((spectrum[1:-2] - hist[1:-2]) / hist[1:-2]) + assert np.max(err) < max_rtol and np.average(err) < avg_rtol, \ + "real power spectrum inaccurate for grid_shape=%s" % str(grid_shape) + + if timing: + from common import timer + t = timer(lambda: spec.bin_power(fk_d, k_power=k_power, is_real=True)) + print("real power spectrum took %.3f ms for grid_shape=%s" + % (t, str(grid_shape))) + + # complex_shape = (p.grid_shape[0], p.grid_shape[1]//p.proc_shape[0], + # p.grid_shape[2]//p.proc_shape[1]) + # fk = make_data(complex_shape).astype(cdtype) + + # fk_d = cla.to_device(queue, fk) + # spectrum = spec.bin_power(fk_d, k_power=k_power, is_real=False) + + # hist = np.histogram(ckmags/dk, bins=bins, + # weights=np.abs(fk)**2. * ckmags**k_power)[0] + # hist = mpi.allreduce(hist)/spec.bin_counts + + # err = np.abs((spectrum[1:-2] - hist[1:-2]) / hist[1:-2]) + # assert np.max(err) < max_rtol and np.average(err) < avg_rtol, \ + # "complex power spectrum inaccurate for N=%d" % (N) + + # if timing: + # start = time.time() + # for i in range(nrun): + # spectrum = spec.bin_power(fk_d, k_power=k_power, is_real=False) + # end = time.time() + # print("complex power spectrum took %.3f ms for N=%d" + # % ((end - start)/nrun*1000., N)) + + +@pytest.mark.filterwarnings( + "ignore::pyopencl.characterize.CLCharacterizationWarning") +@pytest.mark.filterwarnings("ignore::loopy.diagnostic.LoopyAdvisory") +@pytest.mark.parametrize("dtype", [np.float64, np.float32]) +def test_pol_spectra(ctx_factory, grid_shape, proc_shape, dtype, timing=False): + if ctx_factory: + ctx = ctx_factory() + else: + ctx = ps.choose_device_and_make_context() + + queue = cl.CommandQueue(ctx) + h = 1 + rank_shape = tuple(Ni // pi for Ni, pi in zip(grid_shape, proc_shape)) + mpi = ps.DomainDecomposition(proc_shape, h, rank_shape) + + fft = ps.DFT(mpi, ctx, queue, grid_shape, dtype) + + L = (10, 8, 7) + dk = tuple(2 * np.pi / Li for Li in L) + cdtype = fft.cdtype + spec = ps.PowerSpectra(mpi, fft, dk, np.product(L)) + + k_power = 2. + + fk = make_data(*fft.shape(True)).astype(cdtype) + fk = make_hermitian(fk, fft).astype(cdtype) + plus = cla.to_device(queue, fk) + + fk = make_data(*fft.shape(True)).astype(cdtype) + fk = make_hermitian(fk, fft).astype(cdtype) + minus = cla.to_device(queue, fk) + + plus_ps_1 = spec.bin_power(plus, queue=queue, k_power=k_power) + minus_ps_1 = spec.bin_power(minus, queue=queue, k_power=k_power) + + project = ps.Projector(fft, h) + + vector = cla.empty(queue, (3,)+fft.shape(True), cdtype) + project.pol_to_vec(queue, plus, minus, vector) + project.vec_to_pol(queue, plus, minus, vector) + + plus_ps_2 = spec.bin_power(plus, k_power=k_power) + minus_ps_2 = spec.bin_power(minus, k_power=k_power) + + max_rtol = 1.e-8 if dtype == np.float64 else 1.e-2 + avg_rtol = 1.e-11 if dtype == np.float64 else 1.e-4 + + # skip the Nyquist mode and the zero mode + err = np.abs((plus_ps_1[1:-2] - plus_ps_2[1:-2]) / plus_ps_1[1:-2]) + assert np.max(err) < max_rtol and np.average(err) < avg_rtol, \ + "plus power spectrum inaccurate for grid_shape=%s" % str(grid_shape) + + err = np.abs((minus_ps_1[1:-2] - minus_ps_2[1:-2]) / minus_ps_1[1:-2]) + assert np.max(err) < max_rtol and np.average(err) < avg_rtol, \ + "minus power spectrum inaccurate for grid_shape=%s" % str(grid_shape) + + +if __name__ == "__main__": + args = {'grid_shape': (256,)*3, 'proc_shape': (1,)*3, 'dtype': np.float64} + from common import get_exec_arg_dict + args.update(get_exec_arg_dict()) + test_spectra(None, **args, L=None, timing=True) + test_pol_spectra(None, **args, timing=True) diff --git a/test/test_stencil.py b/test/test_stencil.py new file mode 100644 index 0000000..4c9f879 --- /dev/null +++ b/test/test_stencil.py @@ -0,0 +1,104 @@ +__copyright__ = "Copyright (C) 2019 Zachary J Weiner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + + +import numpy as np +import pyopencl as cl +import pyopencl.clrandom as clr +import pystella as ps +import pytest + +from pyopencl.tools import ( # noqa + pytest_generate_tests_for_pyopencl as pytest_generate_tests) + + +@pytest.mark.filterwarnings( + "ignore::pyopencl.characterize.CLCharacterizationWarning") +@pytest.mark.filterwarnings("ignore::loopy.diagnostic.LoopyAdvisory") +@pytest.mark.parametrize("dtype", [np.float64, np.float32]) +@pytest.mark.parametrize("stream", [True, False]) +def test_stencil(ctx_factory, grid_shape, proc_shape, dtype, stream, h=1, + timing=False): + if ctx_factory: + ctx = ctx_factory() + else: + ctx = ps.choose_device_and_make_context() + + queue = cl.CommandQueue(ctx) + rank_shape = tuple(Ni // pi for Ni, pi in zip(grid_shape, proc_shape)) + + from pymbolic import var + x = var('x') + y = var('y') + i, j, k = var('i'), var('j'), var('k') + + map_dict = {} + map_dict[y[i, j, k]] = x[i + h + h, j + h, k + h] \ + + x[i + h, j + h + h, k + h] \ + + x[i + h, j + h, k + h + h] \ + + x[i - h + h, j + h, k + h] \ + + x[i + h, j - h + h, k + h] \ + + x[i + h, j + h, k - h + h] + + if stream: + try: + stencil_map = ps.StreamingStencil(map_dict, prefetch_args=['x'], h=h) + except: # noqa + pytest.skip("StreamingStencil unavailable") + else: + stencil_map = ps.Stencil(map_dict, prefetch_args=['x'], h=h) + + x = clr.rand(queue, tuple(ni + 2*h for ni in rank_shape), dtype) + y = clr.rand(queue, rank_shape, dtype) + + x_h = x.get() + y_true = (x_h[2*h:, h:-h, h:-h] + + x_h[h:-h, 2*h:, h:-h] + + x_h[h:-h, h:-h, 2*h:] + + x_h[:-2*h, h:-h, h:-h] + + x_h[h:-h, :-2*h, h:-h] + + x_h[h:-h, h:-h, :-2*h]) + + stencil_map(queue, x=x, y=y) + + rtol = 5.e-14 if dtype == np.float64 else 1.e-5 + + assert np.allclose(y.get(), y_true, rtol=rtol, atol=0), \ + "average innaccurate for grid_shape=%s, h=%d, proc_shape=%s" \ + % (str(grid_shape), h, str(proc_shape)) + + if timing: + from common import timer + t = timer(lambda: stencil_map(queue, x=x, y=y)[0]) + print("stencil took %.3f ms for grid_shape=%s, h=%d, proc_shape=%s" + % (t, str(grid_shape), h, str(proc_shape))) + print("Bandwidth = %.1f GB/s" + % ((x.nbytes + y.nbytes)/1024**3 / t * 1000)) + + +if __name__ == "__main__": + args = {'grid_shape': (256,)*3, 'proc_shape': (1,)*3, 'dtype': np.float64} + from common import get_exec_arg_dict + args.update(get_exec_arg_dict()) + for h in range(1, 4): + for stream in [True, False]: + test_stencil(None, **args, stream=stream, h=h, timing=True) diff --git a/test/test_step.py b/test/test_step.py new file mode 100644 index 0000000..1254bda --- /dev/null +++ b/test/test_step.py @@ -0,0 +1,118 @@ +__copyright__ = "Copyright (C) 2019 Zachary J Weiner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + + +import numpy as np +import pyopencl as cl +import pyopencl.array as cla +import pyopencl.clrandom as clr +import pyopencl.clmath as clm +import pystella as ps +import pytest + +from pyopencl.tools import ( # noqa + pytest_generate_tests_for_pyopencl as pytest_generate_tests) + + +# this only tests Stepper's correctness as an ODE solver +from pystella.step import all_steppers +@pytest.mark.parametrize("dtype", [np.float64]) +@pytest.mark.parametrize("Stepper", all_steppers) +def test_step(ctx_factory, proc_shape, dtype, Stepper): + if proc_shape != (1, 1, 1): + pytest.skip("test step only on one rank") + + if ctx_factory: + ctx = ctx_factory() + else: + ctx = ps.choose_device_and_make_context() + + queue = cl.CommandQueue(ctx) + + from pystella.step import LowStorageRKStepper + is_low_storage = LowStorageRKStepper in Stepper.__bases__ + + rank_shape = (64,)*3 + if is_low_storage: + arr_shape = rank_shape + else: + arr_shape = (3,) + rank_shape + + dtlist = [.1, .05, .025, .0125] + + for n in [-1., -2., -3., -4.]: + max_errs = {} + for dt in dtlist: + + def sol(y0, t): + return ((-1 + n)*(-t + y0**(1 - n)/(-1 + n)))**(1/(1 - n)) + + y = ps.Field('y') + rhs_dict = {y: y**n} + + import loopy as lp + args = [lp.GlobalArg('y', shape=arr_shape, dtype=dtype)] + y = clr.rand(queue, arr_shape, dtype=dtype) + 1. + + if is_low_storage: + k_tmp = cla.zeros(queue, (1,)+arr_shape, dtype=dtype) + y0 = y.copy() + stepper = Stepper(rhs_dict, k_tmp=k_tmp, args=args, dt=dt, h=1, + rank_shape=rank_shape) + else: + stepper = Stepper(rhs_dict, args=args, dt=dt, h=1, + rank_shape=rank_shape) + y0 = y[0].copy() + + t = 0 + errs = [] + while t < 1.: + for s in range(stepper.num_stages): + stepper(s, queue=queue, y=y) + t += dt + + if is_low_storage: + errs.append(cla.max(clm.fabs(1. - sol(y0, t)/y)).get()) + else: + errs.append(cla.max(clm.fabs(1. - sol(y0, t)/y[0])).get()) + + max_errs[dt] = max(errs) + + order = stepper.expected_order + print(order) + print(n, max_errs) + for a, b in zip(dtlist[:-1], dtlist[1:]): + print(max_errs[a] / max_errs[b]) + + order = stepper.expected_order + rtol = dtlist[-1]**order if dtype == np.float64 else 1.e-1 + assert list(max_errs.values())[-1] < rtol, \ + "Stepper solution inaccurate for n=%f" % (n) + + for a, b in zip(dtlist[:-1], dtlist[1:]): + assert max_errs[a] / max_errs[b] > .9 * 2.**order, \ + "Stepper convergence failing for n=%f" % (n) + + +if __name__ == "__main__": + for stepper in all_steppers: + test_step(None, (1, 1, 1), np.float64, stepper) diff --git a/test/test_transfer.py b/test/test_transfer.py new file mode 100644 index 0000000..480ba36 --- /dev/null +++ b/test/test_transfer.py @@ -0,0 +1,125 @@ +__copyright__ = "Copyright (C) 2019 Zachary J Weiner" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + + +import numpy as np +import pyopencl as cl +import pyopencl.array as cla +import pystella as ps +import pytest + +from pyopencl.tools import ( # noqa + pytest_generate_tests_for_pyopencl as pytest_generate_tests) + + +@pytest.mark.filterwarnings( + "ignore::pyopencl.characterize.CLCharacterizationWarning") +@pytest.mark.filterwarnings("ignore::loopy.diagnostic.LoopyAdvisory") +@pytest.mark.filterwarnings("ignore::loopy.diagnostic.ParameterFinderWarning") +@pytest.mark.parametrize("h", [2]) +@pytest.mark.parametrize("dtype", [np.float64]) +def test_transfer(ctx_factory, grid_shape, proc_shape, h, dtype, timing=False): + if ctx_factory: + ctx = ctx_factory() + else: + ctx = ps.choose_device_and_make_context() + + queue = cl.CommandQueue(ctx) + rank_shape = tuple(Ni // pi for Ni, pi in zip(grid_shape, proc_shape)) + mpi = ps.DomainDecomposition(proc_shape, h, rank_shape) + grid_shape_2 = tuple(Ni // 2 for Ni in grid_shape) + rank_shape_2 = tuple(ni // 2 for ni in rank_shape) + mpi2 = ps.DomainDecomposition(proc_shape, h, rank_shape_2) + + from pystella.multigrid import (Injection, FullWeighting, + LinearInterpolation, CubicInterpolation) + + inject = Injection(h=h, dtype=dtype) + full_weighting = FullWeighting(h=h, dtype=dtype) + + def relerr(a, b): + return np.max(np.abs(a-b)) + + for restrict in [inject, full_weighting]: + f1h = cla.zeros(queue, tuple(ni + 2*h for ni in rank_shape), dtype) + f2h = cla.zeros(queue, tuple(ni + 2*h for ni in rank_shape_2), dtype) + + kvec = 2 * np.pi * np.array([1, 1, 1]).astype(dtype) + + xvecs = np.meshgrid(np.linspace(0, 1, grid_shape[0]+1)[:-1], + np.linspace(0, 1, grid_shape[1]+1)[:-1], + np.linspace(0, 1, grid_shape[2]+1)[:-1], indexing='ij') + + phases = kvec[0] * xvecs[0] + kvec[1] * xvecs[1] + kvec[2] * xvecs[2] + mpi.scatter_array(queue, np.sin(phases), f1h, root=0) + mpi.share_halos(queue, f1h) + + restrict(queue, f1=f1h, f2=f2h) + + restrict_error = relerr(f1h.get()[h:-h:2, h:-h:2, h:-h:2], + f2h.get()[h:-h, h:-h, h:-h]) + + if restrict == inject: + expected_error_bound = 1.e-15 + else: + expected_error_bound = .05 / (grid_shape[0]/32)**2 + + assert restrict_error < expected_error_bound, \ + "%s innaccurate for grid_shape=%s, h=%d, proc_shape=%s" \ + % ('restrict', str(grid_shape), h, str(proc_shape)) + + linear_interp = LinearInterpolation(h=h, dtype=dtype) + cubic_interp = CubicInterpolation(h=h, dtype=dtype) + + for interp in [linear_interp, cubic_interp]: + kvec = 2 * np.pi * np.array([1, 1, 1]).astype(dtype) + + xvecs = np.meshgrid(np.linspace(0, 1, grid_shape_2[0]+1)[:-1], + np.linspace(0, 1, grid_shape_2[1]+1)[:-1], + np.linspace(0, 1, grid_shape_2[2]+1)[:-1], indexing='ij') + + phases = kvec[0] * xvecs[0] + kvec[1] * xvecs[1] + kvec[2] * xvecs[2] + mpi2.scatter_array(queue, np.sin(phases), f2h, root=0) + mpi2.share_halos(queue, f2h) + + f1h_new = cla.zeros_like(f1h) + interp(queue, f1=f1h_new, f2=f2h) + mpi.share_halos(queue, f1h_new) + + interp_error = relerr(f1h_new.get(), f1h.get()) + + if interp == cubic_interp: + expected_error_bound = .005 / (grid_shape[0]/32)**4 + else: + expected_error_bound = .1 / (grid_shape[0]/32)**2 + + assert interp_error < expected_error_bound, \ + "%s innaccurate for grid_shape=%s, h=%d, proc_shape=%s" \ + % ('interp', str(grid_shape), h, str(proc_shape)) + + +if __name__ == "__main__": + args = {'grid_shape': (128,)*3, 'proc_shape': (1,)*3, + 'dtype': np.float64, 'h': 2} + from common import get_exec_arg_dict + args.update(get_exec_arg_dict()) + test_transfer(None, **args, timing=True)