Skip to content

Commit be4b980

Browse files
authored
Limit and format number of displayed dimensions in repr (#5662)
* Truncate dims * better name and no typing * Use limited formatting on dataarrays * limit unindexed dims, code cleanup * typing * typing * typing * typing * typing * handle hashables * Add test for element formatter * Update test_formatting.py * remove the trailing whitespace * Remove trailing whitespaces * Update whats-new.rst * Update whats-new.rst * Move to breaking changes instead * Add typing to tests. * With OPTIONS typed we can add more typing * Fix errors in tests * Update whats-new.rst
1 parent b88c65a commit be4b980

File tree

3 files changed

+139
-19
lines changed

3 files changed

+139
-19
lines changed

doc/whats-new.rst

+3
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,9 @@ New Features
2727

2828
Breaking changes
2929
~~~~~~~~~~~~~~~~
30+
- Improve repr readability when there are a large number of dimensions in datasets or dataarrays by
31+
wrapping the text once the maximum display width has been exceeded. (:issue: `5546`, :pull:`5662`)
32+
By `Jimmy Westling <https://github.com/illviljan>`_.
3033

3134

3235
Deprecations

xarray/core/formatting.py

+94-11
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import functools
55
from datetime import datetime, timedelta
66
from itertools import chain, zip_longest
7-
from typing import Hashable
7+
from typing import Collection, Hashable, Optional
88

99
import numpy as np
1010
import pandas as pd
@@ -97,6 +97,16 @@ def last_item(array):
9797
return np.ravel(np.asarray(array[indexer])).tolist()
9898

9999

100+
def calc_max_rows_first(max_rows: int) -> int:
101+
"""Calculate the first rows to maintain the max number of rows."""
102+
return max_rows // 2 + max_rows % 2
103+
104+
105+
def calc_max_rows_last(max_rows: int) -> int:
106+
"""Calculate the last rows to maintain the max number of rows."""
107+
return max_rows // 2
108+
109+
100110
def format_timestamp(t):
101111
"""Cast given object to a Timestamp and return a nicely formatted string"""
102112
# Timestamp is only valid for 1678 to 2262
@@ -384,11 +394,11 @@ def _mapping_repr(
384394
summary = [f"{summary[0]} ({len_mapping})"]
385395
elif max_rows is not None and len_mapping > max_rows:
386396
summary = [f"{summary[0]} ({max_rows}/{len_mapping})"]
387-
first_rows = max_rows // 2 + max_rows % 2
397+
first_rows = calc_max_rows_first(max_rows)
388398
keys = list(mapping.keys())
389399
summary += [summarizer(k, mapping[k], col_width) for k in keys[:first_rows]]
390400
if max_rows > 1:
391-
last_rows = max_rows // 2
401+
last_rows = calc_max_rows_last(max_rows)
392402
summary += [pretty_print(" ...", col_width) + " ..."]
393403
summary += [
394404
summarizer(k, mapping[k], col_width) for k in keys[-last_rows:]
@@ -441,11 +451,74 @@ def dim_summary(obj):
441451
return ", ".join(elements)
442452

443453

444-
def unindexed_dims_repr(dims, coords):
454+
def _element_formatter(
455+
elements: Collection[Hashable],
456+
col_width: int,
457+
max_rows: Optional[int] = None,
458+
delimiter: str = ", ",
459+
) -> str:
460+
"""
461+
Formats elements for better readability.
462+
463+
Once it becomes wider than the display width it will create a newline and
464+
continue indented to col_width.
465+
Once there are more rows than the maximum displayed rows it will start
466+
removing rows.
467+
468+
Parameters
469+
----------
470+
elements : Collection of hashable
471+
Elements to join together.
472+
col_width : int
473+
The width to indent to if a newline has been made.
474+
max_rows : int, optional
475+
The maximum number of allowed rows. The default is None.
476+
delimiter : str, optional
477+
Delimiter to use between each element. The default is ", ".
478+
"""
479+
elements_len = len(elements)
480+
out = [""]
481+
length_row = 0
482+
for i, v in enumerate(elements):
483+
delim = delimiter if i < elements_len - 1 else ""
484+
v_delim = f"{v}{delim}"
485+
length_element = len(v_delim)
486+
length_row += length_element
487+
488+
# Create a new row if the next elements makes the print wider than
489+
# the maximum display width:
490+
if col_width + length_row > OPTIONS["display_width"]:
491+
out[-1] = out[-1].rstrip() # Remove trailing whitespace.
492+
out.append("\n" + pretty_print("", col_width) + v_delim)
493+
length_row = length_element
494+
else:
495+
out[-1] += v_delim
496+
497+
# If there are too many rows of dimensions trim some away:
498+
if max_rows and (len(out) > max_rows):
499+
first_rows = calc_max_rows_first(max_rows)
500+
last_rows = calc_max_rows_last(max_rows)
501+
out = (
502+
out[:first_rows]
503+
+ ["\n" + pretty_print("", col_width) + "..."]
504+
+ (out[-last_rows:] if max_rows > 1 else [])
505+
)
506+
return "".join(out)
507+
508+
509+
def dim_summary_limited(obj, col_width: int, max_rows: Optional[int] = None) -> str:
510+
elements = [f"{k}: {v}" for k, v in obj.sizes.items()]
511+
return _element_formatter(elements, col_width, max_rows)
512+
513+
514+
def unindexed_dims_repr(dims, coords, max_rows: Optional[int] = None):
445515
unindexed_dims = [d for d in dims if d not in coords]
446516
if unindexed_dims:
447-
dims_str = ", ".join(f"{d}" for d in unindexed_dims)
448-
return "Dimensions without coordinates: " + dims_str
517+
dims_start = "Dimensions without coordinates: "
518+
dims_str = _element_formatter(
519+
unindexed_dims, col_width=len(dims_start), max_rows=max_rows
520+
)
521+
return dims_start + dims_str
449522
else:
450523
return None
451524

@@ -505,6 +578,8 @@ def short_data_repr(array):
505578
def array_repr(arr):
506579
from .variable import Variable
507580

581+
max_rows = OPTIONS["display_max_rows"]
582+
508583
# used for DataArray, Variable and IndexVariable
509584
if hasattr(arr, "name") and arr.name is not None:
510585
name_str = f"{arr.name!r} "
@@ -520,16 +595,23 @@ def array_repr(arr):
520595
else:
521596
data_repr = inline_variable_array_repr(arr.variable, OPTIONS["display_width"])
522597

598+
start = f"<xarray.{type(arr).__name__} {name_str}"
599+
dims = dim_summary_limited(arr, col_width=len(start) + 1, max_rows=max_rows)
523600
summary = [
524-
"<xarray.{} {}({})>".format(type(arr).__name__, name_str, dim_summary(arr)),
601+
f"{start}({dims})>",
525602
data_repr,
526603
]
527604

528605
if hasattr(arr, "coords"):
529606
if arr.coords:
530-
summary.append(repr(arr.coords))
607+
col_width = _calculate_col_width(_get_col_items(arr.coords))
608+
summary.append(
609+
coords_repr(arr.coords, col_width=col_width, max_rows=max_rows)
610+
)
531611

532-
unindexed_dims_str = unindexed_dims_repr(arr.dims, arr.coords)
612+
unindexed_dims_str = unindexed_dims_repr(
613+
arr.dims, arr.coords, max_rows=max_rows
614+
)
533615
if unindexed_dims_str:
534616
summary.append(unindexed_dims_str)
535617

@@ -546,12 +628,13 @@ def dataset_repr(ds):
546628
max_rows = OPTIONS["display_max_rows"]
547629

548630
dims_start = pretty_print("Dimensions:", col_width)
549-
summary.append("{}({})".format(dims_start, dim_summary(ds)))
631+
dims_values = dim_summary_limited(ds, col_width=col_width + 1, max_rows=max_rows)
632+
summary.append(f"{dims_start}({dims_values})")
550633

551634
if ds.coords:
552635
summary.append(coords_repr(ds.coords, col_width=col_width, max_rows=max_rows))
553636

554-
unindexed_dims_str = unindexed_dims_repr(ds.dims, ds.coords)
637+
unindexed_dims_str = unindexed_dims_repr(ds.dims, ds.coords, max_rows=max_rows)
555638
if unindexed_dims_str:
556639
summary.append(unindexed_dims_str)
557640

xarray/tests/test_formatting.py

+42-8
Original file line numberDiff line numberDiff line change
@@ -552,18 +552,52 @@ def test__mapping_repr(display_max_rows, n_vars, n_attr) -> None:
552552
assert len_summary == n_vars
553553

554554
with xr.set_options(
555+
display_max_rows=display_max_rows,
555556
display_expand_coords=False,
556557
display_expand_data_vars=False,
557558
display_expand_attrs=False,
558559
):
559560
actual = formatting.dataset_repr(ds)
560-
coord_s = ", ".join([f"{c}: {len(v)}" for c, v in coords.items()])
561-
expected = dedent(
562-
f"""\
563-
<xarray.Dataset>
564-
Dimensions: ({coord_s})
565-
Coordinates: ({n_vars})
566-
Data variables: ({n_vars})
567-
Attributes: ({n_attr})"""
561+
col_width = formatting._calculate_col_width(
562+
formatting._get_col_items(ds.variables)
563+
)
564+
dims_start = formatting.pretty_print("Dimensions:", col_width)
565+
dims_values = formatting.dim_summary_limited(
566+
ds, col_width=col_width + 1, max_rows=display_max_rows
568567
)
568+
expected = f"""\
569+
<xarray.Dataset>
570+
{dims_start}({dims_values})
571+
Coordinates: ({n_vars})
572+
Data variables: ({n_vars})
573+
Attributes: ({n_attr})"""
574+
expected = dedent(expected)
569575
assert actual == expected
576+
577+
578+
def test__element_formatter(n_elements: int = 100) -> None:
579+
expected = """\
580+
Dimensions without coordinates: dim_0: 3, dim_1: 3, dim_2: 3, dim_3: 3,
581+
dim_4: 3, dim_5: 3, dim_6: 3, dim_7: 3,
582+
dim_8: 3, dim_9: 3, dim_10: 3, dim_11: 3,
583+
dim_12: 3, dim_13: 3, dim_14: 3, dim_15: 3,
584+
dim_16: 3, dim_17: 3, dim_18: 3, dim_19: 3,
585+
dim_20: 3, dim_21: 3, dim_22: 3, dim_23: 3,
586+
...
587+
dim_76: 3, dim_77: 3, dim_78: 3, dim_79: 3,
588+
dim_80: 3, dim_81: 3, dim_82: 3, dim_83: 3,
589+
dim_84: 3, dim_85: 3, dim_86: 3, dim_87: 3,
590+
dim_88: 3, dim_89: 3, dim_90: 3, dim_91: 3,
591+
dim_92: 3, dim_93: 3, dim_94: 3, dim_95: 3,
592+
dim_96: 3, dim_97: 3, dim_98: 3, dim_99: 3"""
593+
expected = dedent(expected)
594+
595+
intro = "Dimensions without coordinates: "
596+
elements = [
597+
f"{k}: {v}" for k, v in {f"dim_{k}": 3 for k in np.arange(n_elements)}.items()
598+
]
599+
values = xr.core.formatting._element_formatter(
600+
elements, col_width=len(intro), max_rows=12
601+
)
602+
actual = intro + values
603+
assert expected == actual

0 commit comments

Comments
 (0)