diff --git a/.github/workflows/xskillscore_testing.yml b/.github/workflows/xskillscore_testing.yml index d675843b..07a37860 100644 --- a/.github/workflows/xskillscore_testing.yml +++ b/.github/workflows/xskillscore_testing.yml @@ -51,7 +51,7 @@ jobs: run: | pytest -n 4 --cov=xskillscore --cov-report=xml --verbose - name: Upload coverage to codecov - uses: codecov/codecov-action@v1.4.1 + uses: codecov/codecov-action@v1.5.2 with: token: ${{ secrets.CODECOV_TOKEN }} file: ./coverage.xml diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 13c71757..58eec925 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -12,6 +12,7 @@ Internal Changes Documentation ~~~~~~~~~~~~~ - Added more info in ``quick-start.ipynb`` (:pr:`316`) `Ray Bell`_. +- Created ``tabular-data.ipynb`` (:pr:`330`) `Ray Bell`_. Breaking changes ~~~~~~~~~~~~~~~~ diff --git a/ci/docs_notebooks.yml b/ci/docs_notebooks.yml index 6625a3b8..9a5ec040 100644 --- a/ci/docs_notebooks.yml +++ b/ci/docs_notebooks.yml @@ -16,13 +16,15 @@ dependencies: # see https://github.com/xgcm/xhistogram/issues/48 - xhistogram==0.1.2 - importlib_metadata + - ipykernel - jupyterlab - matplotlib-base - nbsphinx - nbstripout - sphinx - - sphinxcontrib-napoleon + - sphinx-autosummary-accessors - sphinx_rtd_theme + - sphinxcontrib-napoleon - black - doc8 - isort @@ -30,6 +32,5 @@ dependencies: - pre-commit - pip - pip: - - sphinx_autosummary_accessors # Install latest version of xskillscore. - -e .. diff --git a/docs/source/geophysical-data.ipynb b/docs/source/geophysical-data.ipynb new file mode 100644 index 00000000..09d93658 --- /dev/null +++ b/docs/source/geophysical-data.ipynb @@ -0,0 +1,39 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": 3 + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [ + "# Geophysical Data\n", + "\n", + "See the [Quick Start](https://xskillscore.readthedocs.io/en/stable/quick-start.html) section.\n", + "\n", + "[climpred](https://climpred.readthedocs.io/en/stable/) has examples on evaluating climate predictions which uses `xskillscore`." + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ] +} diff --git a/docs/source/index.rst b/docs/source/index.rst index 03244e99..7c4cb6b0 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -51,6 +51,8 @@ You can also install the bleeding edge (pre-release versions) by running: :caption: Getting Started quick-start.ipynb + geophysical-data.ipynb + tabular-data.ipynb **Help & Reference** diff --git a/docs/source/tabular-data.ipynb b/docs/source/tabular-data.ipynb new file mode 100644 index 00000000..e4babdca --- /dev/null +++ b/docs/source/tabular-data.ipynb @@ -0,0 +1,507 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.4" + }, + "orig_nbformat": 4, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.9.4 64-bit ('xskillscore-docs-notebooks': conda)" + }, + "interpreter": { + "hash": "e5607b67897ceeb4cb8d1a6f5e8f77cf995244d75ab9ff3b133e23bb37c07f75" + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [ + "# Tabular Data\n", + "\n", + "`xskillscore` can be used on tabular data such as that stored in a `pandas.DataFrame`.\n", + "\n", + "It can be used most effectively when evaluating predictions over different fields." + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import xskillscore as xs\n", + "from sklearn.datasets import load_boston\n", + "from sklearn.metrics import mean_squared_error\n", + "np.random.seed(seed=42)" + ] + }, + { + "source": [ + "## Boston house prices dataset" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "A small example is to take a dataset and evaluate the model according to a field (column).\n", + "\n", + "Load the Boston house prices dataset:" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " CRIM ZN INDUS CHAS NOX RM AGE DIS RAD TAX \\\n", + "0 0.00632 18.0 2.31 0.0 0.538 6.575 65.2 4.0900 1.0 296.0 \n", + "1 0.02731 0.0 7.07 0.0 0.469 6.421 78.9 4.9671 2.0 242.0 \n", + "2 0.02729 0.0 7.07 0.0 0.469 7.185 61.1 4.9671 2.0 242.0 \n", + "3 0.03237 0.0 2.18 0.0 0.458 6.998 45.8 6.0622 3.0 222.0 \n", + "4 0.06905 0.0 2.18 0.0 0.458 7.147 54.2 6.0622 3.0 222.0 \n", + ".. ... ... ... ... ... ... ... ... ... ... \n", + "501 0.06263 0.0 11.93 0.0 0.573 6.593 69.1 2.4786 1.0 273.0 \n", + "502 0.04527 0.0 11.93 0.0 0.573 6.120 76.7 2.2875 1.0 273.0 \n", + "503 0.06076 0.0 11.93 0.0 0.573 6.976 91.0 2.1675 1.0 273.0 \n", + "504 0.10959 0.0 11.93 0.0 0.573 6.794 89.3 2.3889 1.0 273.0 \n", + "505 0.04741 0.0 11.93 0.0 0.573 6.030 80.8 2.5050 1.0 273.0 \n", + "\n", + " PTRATIO B LSTAT y \n", + "0 15.3 396.90 4.98 24.0 \n", + "1 17.8 396.90 9.14 21.6 \n", + "2 17.8 392.83 4.03 34.7 \n", + "3 18.7 394.63 2.94 33.4 \n", + "4 18.7 396.90 5.33 36.2 \n", + ".. ... ... ... ... \n", + "501 21.0 391.99 9.67 22.4 \n", + "502 21.0 396.90 9.08 20.6 \n", + "503 21.0 396.90 5.64 23.9 \n", + "504 21.0 393.45 6.48 22.0 \n", + "505 21.0 396.90 7.88 11.9 \n", + "\n", + "[506 rows x 14 columns]" + ], + "text/html": "
\n | CRIM | \nZN | \nINDUS | \nCHAS | \nNOX | \nRM | \nAGE | \nDIS | \nRAD | \nTAX | \nPTRATIO | \nB | \nLSTAT | \ny | \n
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n0.00632 | \n18.0 | \n2.31 | \n0.0 | \n0.538 | \n6.575 | \n65.2 | \n4.0900 | \n1.0 | \n296.0 | \n15.3 | \n396.90 | \n4.98 | \n24.0 | \n
1 | \n0.02731 | \n0.0 | \n7.07 | \n0.0 | \n0.469 | \n6.421 | \n78.9 | \n4.9671 | \n2.0 | \n242.0 | \n17.8 | \n396.90 | \n9.14 | \n21.6 | \n
2 | \n0.02729 | \n0.0 | \n7.07 | \n0.0 | \n0.469 | \n7.185 | \n61.1 | \n4.9671 | \n2.0 | \n242.0 | \n17.8 | \n392.83 | \n4.03 | \n34.7 | \n
3 | \n0.03237 | \n0.0 | \n2.18 | \n0.0 | \n0.458 | \n6.998 | \n45.8 | \n6.0622 | \n3.0 | \n222.0 | \n18.7 | \n394.63 | \n2.94 | \n33.4 | \n
4 | \n0.06905 | \n0.0 | \n2.18 | \n0.0 | \n0.458 | \n7.147 | \n54.2 | \n6.0622 | \n3.0 | \n222.0 | \n18.7 | \n396.90 | \n5.33 | \n36.2 | \n
... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n
501 | \n0.06263 | \n0.0 | \n11.93 | \n0.0 | \n0.573 | \n6.593 | \n69.1 | \n2.4786 | \n1.0 | \n273.0 | \n21.0 | \n391.99 | \n9.67 | \n22.4 | \n
502 | \n0.04527 | \n0.0 | \n11.93 | \n0.0 | \n0.573 | \n6.120 | \n76.7 | \n2.2875 | \n1.0 | \n273.0 | \n21.0 | \n396.90 | \n9.08 | \n20.6 | \n
503 | \n0.06076 | \n0.0 | \n11.93 | \n0.0 | \n0.573 | \n6.976 | \n91.0 | \n2.1675 | \n1.0 | \n273.0 | \n21.0 | \n396.90 | \n5.64 | \n23.9 | \n
504 | \n0.10959 | \n0.0 | \n11.93 | \n0.0 | \n0.573 | \n6.794 | \n89.3 | \n2.3889 | \n1.0 | \n273.0 | \n21.0 | \n393.45 | \n6.48 | \n22.0 | \n
505 | \n0.04741 | \n0.0 | \n11.93 | \n0.0 | \n0.573 | \n6.030 | \n80.8 | \n2.5050 | \n1.0 | \n273.0 | \n21.0 | \n396.90 | \n7.88 | \n11.9 | \n
506 rows × 14 columns
\n\n | \n | y | \nyhat | \n
---|---|---|---|
index | \nRAD | \n\n | \n |
0 | \n1.0 | \n24.0 | \n17.977926 | \n
1 | \n2.0 | \n21.6 | \n41.070858 | \n
2 | \n2.0 | \n34.7 | \n50.800380 | \n
3 | \n3.0 | \n33.4 | \n39.990387 | \n
4 | \n3.0 | \n36.2 | \n11.295750 | \n
... | \n... | \n... | \n... | \n
501 | \n1.0 | \n22.4 | \n24.017117 | \n
502 | \n1.0 | \n20.6 | \n12.752538 | \n
503 | \n1.0 | \n23.9 | \n38.899402 | \n
504 | \n1.0 | \n22.0 | \n30.128172 | \n
505 | \n1.0 | \n11.9 | \n5.000000 | \n
506 rows × 2 columns
\n<xarray.Dataset>\nDimensions: (RAD: 9, index: 506)\nCoordinates:\n * index (index) int64 0 1 2 3 4 5 6 7 8 ... 498 499 500 501 502 503 504 505\n * RAD (RAD) float64 1.0 2.0 3.0 4.0 5.0 6.0 7.0 8.0 24.0\nData variables:\n y (index, RAD) float64 24.0 nan nan nan nan ... nan nan nan nan nan\n yhat (index, RAD) float64 17.98 nan nan nan nan ... nan nan nan nan nan
array([ 0, 1, 2, ..., 503, 504, 505])
array([ 1., 2., 3., 4., 5., 6., 7., 8., 24.])
array([[24. , nan, nan, ..., nan, nan, nan],\n [ nan, 21.6, nan, ..., nan, nan, nan],\n [ nan, 34.7, nan, ..., nan, nan, nan],\n ...,\n [23.9, nan, nan, ..., nan, nan, nan],\n [22. , nan, nan, ..., nan, nan, nan],\n [11.9, nan, nan, ..., nan, nan, nan]])
array([[17.9779257 , nan, nan, ..., nan,\n nan, nan],\n [ nan, 41.07085804, nan, ..., nan,\n nan, nan],\n [ nan, 50.80037956, nan, ..., nan,\n nan, nan],\n ...,\n [38.89940194, nan, nan, ..., nan,\n nan, nan],\n [30.12817159, nan, nan, ..., nan,\n nan, nan],\n [ 5. , nan, nan, ..., nan,\n nan, nan]])
<xarray.DataArray (RAD: 9)>\narray([161.23755363, 313.85575025, 307.22076012, 162.63442999,\n 221.85296903, 155.6129776 , 214.37524005, 278.09256049,\n 148.84050691])\nCoordinates:\n * RAD (RAD) float64 1.0 2.0 3.0 4.0 5.0 6.0 7.0 8.0 24.0
array([161.23755363, 313.85575025, 307.22076012, 162.63442999,\n 221.85296903, 155.6129776 , 214.37524005, 278.09256049,\n 148.84050691])
array([ 1., 2., 3., 4., 5., 6., 7., 8., 24.])
\n | DATE | \nSTORE | \nSKU | \ny | \nyhat | \n
---|---|---|---|---|---|
0 | \n2020-01-01 | \n0 | \n0 | \n3 | \n4.617306 | \n
1 | \n2020-01-01 | \n0 | \n1 | \n6 | \n1.000000 | \n
2 | \n2020-01-01 | \n0 | \n2 | \n2 | \n3.039347 | \n
3 | \n2020-01-01 | \n0 | \n3 | \n3 | \n5.102145 | \n
4 | \n2020-01-01 | \n0 | \n4 | \n5 | \n3.563087 | \n
... | \n... | \n... | \n... | \n... | \n... | \n
99995 | \n2020-01-10 | \n99 | \n95 | \n9 | \n15.836256 | \n
99996 | \n2020-01-10 | \n99 | \n96 | \n5 | \n7.515791 | \n
99997 | \n2020-01-10 | \n99 | \n97 | \n1 | \n1.000000 | \n
99998 | \n2020-01-10 | \n99 | \n98 | \n6 | \n6.676512 | \n
99999 | \n2020-01-10 | \n99 | \n99 | \n5 | \n4.600985 | \n
100000 rows × 5 columns
\n