Skip to content

Commit d8fc346

Browse files
authored
Speed-up multi-index html repr + add display_values_threshold option (#6400)
* add _repr_html_ for PandasMultiIndexingAdapter This may greatly speed-up the html repr of Xarray objects with multi-indexes This optimized _repr_html_ is now used for formatting the array detailed view of multi-index coordinates, instead of converting the full index / levels to numpy arrays before formatting it. * update release notes * nit * add display_values_threshold * fix last merge main
1 parent f64f14b commit d8fc346

File tree

5 files changed

+45
-13
lines changed

5 files changed

+45
-13
lines changed

doc/whats-new.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,10 @@ New Features
3232
- Multi-index levels are now accessible through their own, regular coordinates
3333
instead of virtual coordinates (:pull:`5692`).
3434
By `Benoît Bovy <https://github.com/benbovy>`_.
35+
- Add a ``display_values_threshold`` option to control the total number of array
36+
elements which trigger summarization rather than full repr in (numpy) array
37+
detailed views of the html repr (:pull:`6400`).
38+
By `Benoît Bovy <https://github.com/benbovy>`_.
3539

3640
Breaking changes
3741
~~~~~~~~~~~~~~~~
@@ -60,6 +64,8 @@ Bug fixes
6064
- Fixed "unhashable type" error trying to read NetCDF file with variable having its 'units'
6165
attribute not ``str`` (e.g. ``numpy.ndarray``) (:issue:`6368`).
6266
By `Oleh Khoma <https://github.com/okhoma>`_.
67+
- Fixed the poor html repr performance on large multi-indexes (:pull:`6400`).
68+
By `Benoît Bovy <https://github.com/benbovy>`_.
6369
- Allow fancy indexing of duck dask arrays along multiple dimensions. (:pull:`6414`)
6470
By `Justus Magin <https://github.com/keewis>`_.
6571

xarray/core/formatting.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -520,7 +520,11 @@ def short_numpy_repr(array):
520520

521521
# default to lower precision so a full (abbreviated) line can fit on
522522
# one line with the default display_width
523-
options = {"precision": 6, "linewidth": OPTIONS["display_width"], "threshold": 200}
523+
options = {
524+
"precision": 6,
525+
"linewidth": OPTIONS["display_width"],
526+
"threshold": OPTIONS["display_values_threshold"],
527+
}
524528
if array.ndim < 3:
525529
edgeitems = 3
526530
elif array.ndim == 3:

xarray/core/indexing.py

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from contextlib import suppress
66
from dataclasses import dataclass, field
77
from datetime import timedelta
8+
from html import escape
89
from typing import (
910
TYPE_CHECKING,
1011
Any,
@@ -25,6 +26,7 @@
2526

2627
from . import duck_array_ops, nputils, utils
2728
from .npcompat import DTypeLike
29+
from .options import OPTIONS
2830
from .pycompat import dask_version, integer_types, is_duck_dask_array, sparse_array_type
2931
from .types import T_Xarray
3032
from .utils import either_dict_or_kwargs, get_valid_numpy_dtype
@@ -1507,23 +1509,31 @@ def __repr__(self) -> str:
15071509
)
15081510
return f"{type(self).__name__}{props}"
15091511

1510-
def _repr_inline_(self, max_width) -> str:
1511-
# special implementation to speed-up the repr for big multi-indexes
1512+
def _get_array_subset(self) -> np.ndarray:
1513+
# used to speed-up the repr for big multi-indexes
1514+
threshold = max(100, OPTIONS["display_values_threshold"] + 2)
1515+
if self.size > threshold:
1516+
pos = threshold // 2
1517+
indices = np.concatenate([np.arange(0, pos), np.arange(-pos, 0)])
1518+
subset = self[OuterIndexer((indices,))]
1519+
else:
1520+
subset = self
1521+
1522+
return np.asarray(subset)
1523+
1524+
def _repr_inline_(self, max_width: int) -> str:
1525+
from .formatting import format_array_flat
1526+
15121527
if self.level is None:
15131528
return "MultiIndex"
15141529
else:
1515-
from .formatting import format_array_flat
1530+
return format_array_flat(self._get_array_subset(), max_width)
15161531

1517-
if self.size > 100 and max_width < self.size:
1518-
n_values = max_width
1519-
indices = np.concatenate(
1520-
[np.arange(0, n_values), np.arange(-n_values, 0)]
1521-
)
1522-
subset = self[OuterIndexer((indices,))]
1523-
else:
1524-
subset = self
1532+
def _repr_html_(self) -> str:
1533+
from .formatting import short_numpy_repr
15251534

1526-
return format_array_flat(np.asarray(subset), max_width)
1535+
array_repr = short_numpy_repr(self._get_array_subset())
1536+
return f"<pre>{escape(array_repr)}</pre>"
15271537

15281538
def copy(self, deep: bool = True) -> "PandasMultiIndexingAdapter":
15291539
# see PandasIndexingAdapter.copy

xarray/core/options.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ class T_Options(TypedDict):
1515
cmap_divergent: Union[str, "Colormap"]
1616
cmap_sequential: Union[str, "Colormap"]
1717
display_max_rows: int
18+
display_values_threshold: int
1819
display_style: Literal["text", "html"]
1920
display_width: int
2021
display_expand_attrs: Literal["default", True, False]
@@ -33,6 +34,7 @@ class T_Options(TypedDict):
3334
"cmap_divergent": "RdBu_r",
3435
"cmap_sequential": "viridis",
3536
"display_max_rows": 12,
37+
"display_values_threshold": 200,
3638
"display_style": "html",
3739
"display_width": 80,
3840
"display_expand_attrs": "default",
@@ -57,6 +59,7 @@ def _positive_integer(value):
5759
_VALIDATORS = {
5860
"arithmetic_join": _JOIN_OPTIONS.__contains__,
5961
"display_max_rows": _positive_integer,
62+
"display_values_threshold": _positive_integer,
6063
"display_style": _DISPLAY_OPTIONS.__contains__,
6164
"display_width": _positive_integer,
6265
"display_expand_attrs": lambda choice: choice in [True, False, "default"],
@@ -154,6 +157,9 @@ class set_options:
154157
* ``default`` : to expand unless over a pre-defined limit
155158
display_max_rows : int, default: 12
156159
Maximum display rows.
160+
display_values_threshold : int, default: 200
161+
Total number of array elements which trigger summarization rather
162+
than full repr for variable data views (numpy arrays).
157163
display_style : {"text", "html"}, default: "html"
158164
Display style to use in jupyter for xarray objects.
159165
display_width : int, default: 80

xarray/tests/test_formatting.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -479,6 +479,12 @@ def test_short_numpy_repr() -> None:
479479
num_lines = formatting.short_numpy_repr(array).count("\n") + 1
480480
assert num_lines < 30
481481

482+
# threshold option (default: 200)
483+
array = np.arange(100)
484+
assert "..." not in formatting.short_numpy_repr(array)
485+
with xr.set_options(display_values_threshold=10):
486+
assert "..." in formatting.short_numpy_repr(array)
487+
482488

483489
def test_large_array_repr_length() -> None:
484490

0 commit comments

Comments
 (0)