pydata · dcherian · Jul 7, 2025 · May 9, 2025 · May 9, 2025 · May 9, 2025
diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml
@@ -15,7 +15,7 @@ jobs:
     runs-on: ubuntu-latest
     env:
       ASV_DIR: "./asv_bench"
-      CONDA_ENV_FILE: ci/requirements/environment.yml
+      CONDA_ENV_FILE: ci/requirements/environment-benchmark.yml
 
     steps:
       # We need the full repo to avoid this issue
@@ -29,7 +29,7 @@ jobs:
         with:
           micromamba-version: "1.5.10-0"
           environment-file: ${{env.CONDA_ENV_FILE}}
-          environment-name: xarray-tests
+          environment-name: xarray-benchmark
           cache-environment: true
           cache-environment-key: "${{runner.os}}-${{runner.arch}}-py${{env.PYTHON_VERSION}}-${{env.TODAY}}-${{hashFiles(env.CONDA_ENV_FILE)}}-benchmark"
           # add "build" because of https://github.com/airspeed-velocity/asv/issues/1385

diff --git a/asv_bench/asv.conf.json b/asv_bench/asv.conf.json
@@ -60,7 +60,7 @@
   // },
   "matrix": {
     "setuptools_scm": [""], // GH6609
-    "numpy": [""],
+    "numpy": ["2.2"],
     "pandas": [""],
     "netcdf4": [""],
     "scipy": [""],

diff --git a/asv_bench/benchmarks/repr.py b/asv_bench/benchmarks/repr.py
@@ -57,3 +57,31 @@ def time_repr(self):
 
     def time_repr_html(self):
         self.da._repr_html_()
+
+
+class ReprPandasRangeIndex:
+    # display a memory-saving pandas.RangeIndex shouldn't trigger memory
+    # expensive conversion into a numpy array
+    def setup(self):
+        index = xr.indexes.PandasIndex(pd.RangeIndex(1_000_000), "x")
+        self.ds = xr.Dataset(coords=xr.Coordinates.from_xindex(index))
+
+    def time_repr(self):
+        repr(self.ds.x)
+
+    def time_repr_html(self):
+        self.ds.x._repr_html_()
+
+
+class ReprXarrayRangeIndex:
+    # display an Xarray RangeIndex shouldn't trigger memory expensive conversion
+    # of its lazy coordinate into a numpy array
+    def setup(self):
+        index = xr.indexes.RangeIndex.arange(1_000_000, dim="x")
+        self.ds = xr.Dataset(coords=xr.Coordinates.from_xindex(index))
+
+    def time_repr(self):
+        repr(self.ds.x)
+
+    def time_repr_html(self):
+        self.ds.x._repr_html_()
diff --git a/ci/requirements/environment-benchmark.yml b/ci/requirements/environment-benchmark.yml
@@ -0,0 +1,23 @@
+name: xarray-benchmark
+channels:
+  - conda-forge
+  - nodefaults
+dependencies:
+  - bottleneck
+  - cftime
+  - dask-core
+  - distributed
+  - flox
+  - netcdf4
+  - numba
+  - numbagg
+  - numexpr
+  - numpy>=2.2,<2.3 # https://github.com/numba/numba/issues/10105
+  - opt_einsum
+  - packaging
+  - pandas
+  - pyarrow # pandas raises a deprecation warning without this, breaking doctests
+  - sparse
+  - scipy
+  - toolz
+  - zarr
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -33,6 +33,13 @@ Documentation
 Internal Changes
 ~~~~~~~~~~~~~~~~
 
+- Refactored the ``PandasIndexingAdapter`` and
+  ``CoordinateTransformIndexingAdapter`` internal indexing classes. Coordinate
+  variables that wrap a :py:class:`pandas.RangeIndex`, a
+  :py:class:`pandas.MultiIndex` or a
+  :py:class:`xarray.indexes.CoordinateTransform` are now displayed as lazy variables
+  in the Xarray data reprs (:pull:`10355`).
+  By `Benoit Bovy <https://github.com/benbovy>`_.
 
 .. _whats-new.2025.07.0:
 

diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py
@@ -20,7 +20,11 @@
 from xarray.core.datatree_render import RenderDataTree
 from xarray.core.duck_array_ops import array_all, array_any, array_equiv, astype, ravel
 from xarray.core.extension_array import PandasExtensionArray
-from xarray.core.indexing import MemoryCachedArray
+from xarray.core.indexing import (
+    BasicIndexer,
+    ExplicitlyIndexed,
+    MemoryCachedArray,
+)
 from xarray.core.options import OPTIONS, _get_boolean_with_default
 from xarray.core.treenode import group_subtrees
 from xarray.core.utils import is_duck_array
@@ -87,6 +91,8 @@ def first_n_items(array, n_desired):
 
     if n_desired < array.size:
         indexer = _get_indexer_at_least_n_items(array.shape, n_desired, from_end=False)
+        if isinstance(array, ExplicitlyIndexed):
+            indexer = BasicIndexer(indexer)
         array = array[indexer]
 
     # We pass variable objects in to handle indexing
@@ -111,6 +117,8 @@ def last_n_items(array, n_desired):
 
     if n_desired < array.size:
         indexer = _get_indexer_at_least_n_items(array.shape, n_desired, from_end=True)
+        if isinstance(array, ExplicitlyIndexed):
+            indexer = BasicIndexer(indexer)
         array = array[indexer]
 
     # We pass variable objects in to handle indexing
@@ -659,6 +667,7 @@ def short_array_repr(array):
 def short_data_repr(array):
     """Format "data" for DataArray and Variable."""
     internal_data = getattr(array, "variable", array)._data
+
     if isinstance(array, np.ndarray):
         return short_array_repr(array)
     elif is_duck_array(internal_data):