Skip to content

Commit 3885851

Browse files
author
Tom Augspurger
committed
Refactor EA formatting
Moves responsibility for converting EAs to a List[str] from pandas.io.formats to a method on the EA.
1 parent 59d031f commit 3885851

File tree

4 files changed

+164
-44
lines changed

4 files changed

+164
-44
lines changed

pandas/core/arrays/base.py

+74
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
AstypeArg,
3030
Dtype,
3131
FillnaOptions,
32+
FloatFormatType,
3233
PositionalIndexer,
3334
ScalarIndexer,
3435
SequenceIndexer,
@@ -137,6 +138,7 @@ class ExtensionArray:
137138
view
138139
_concat_same_type
139140
_formatter
141+
_format_array
140142
_from_factorized
141143
_from_sequence
142144
_from_sequence_of_strings
@@ -167,6 +169,8 @@ class ExtensionArray:
167169
168170
* __repr__ : A default repr for the ExtensionArray.
169171
* _formatter : Print scalars inside a Series or DataFrame.
172+
* _format_array: Full control over formatting an ExtensionArray
173+
to be included in a Series or DataFrame.
170174
171175
Some methods require casting the ExtensionArray to an ndarray of Python
172176
objects with ``self.astype(object)``, which may be expensive. When
@@ -1232,6 +1236,76 @@ def _repr_2d(self) -> str:
12321236
class_name = f"<{type(self).__name__}>"
12331237
return f"{class_name}\n[\n{data}\n]\nShape: {self.shape}, dtype: {self.dtype}"
12341238

1239+
def _format_array(
1240+
self,
1241+
formatter: Callable | None,
1242+
float_format: FloatFormatType = None,
1243+
na_rep: str = "NaN",
1244+
digits: int = None,
1245+
space: str | int = None,
1246+
justify: str = "right",
1247+
decimal: str = ".",
1248+
leading_space: bool | None = True,
1249+
quoting: int | None = None,
1250+
) -> list[str]:
1251+
"""
1252+
Format an array of of values.
1253+
1254+
Parameters
1255+
----------
1256+
formatter : Callable, optional
1257+
The function to apply to each element of the array to convert it
1258+
to a string. By default, `self._formatter` is used.
1259+
float_format
1260+
na_rep
1261+
digits
1262+
space
1263+
justify
1264+
decimal
1265+
leading_space : bool, optional, default True
1266+
Whether the array should be formatted with a leading space.
1267+
When an array as a column of a Series or DataFrame, we do want
1268+
the leading space to pad between columns.
1269+
1270+
When formatting an Index subclass
1271+
(e.g. IntervalIndex._format_native_types), we don't want the
1272+
leading space since it should be left-aligned.
1273+
1274+
1275+
"""
1276+
from pandas import Categorical
1277+
from pandas.core.construction import extract_array
1278+
1279+
from pandas.io.formats.format import format_array
1280+
1281+
# values = self
1282+
values = extract_array(self, extract_numpy=True)
1283+
1284+
if formatter is None:
1285+
# error: Item "ndarray" of "Union[Any, Union[ExtensionArray, ndarray]]" has
1286+
# no attribute "_formatter"
1287+
formatter = values._formatter(boxed=True) # type: ignore[union-attr]
1288+
1289+
if isinstance(values, Categorical):
1290+
# Categorical is special for now, so that we can preserve tzinfo
1291+
array = values._internal_get_values()
1292+
else:
1293+
array = np.asarray(values)
1294+
1295+
fmt_values = format_array(
1296+
array,
1297+
formatter,
1298+
float_format=float_format,
1299+
na_rep=na_rep,
1300+
digits=digits,
1301+
space=space,
1302+
justify=justify,
1303+
decimal=decimal,
1304+
leading_space=leading_space,
1305+
quoting=quoting,
1306+
)
1307+
return fmt_values
1308+
12351309
def _formatter(self, boxed: bool = False) -> Callable[[Any], str | None]:
12361310
"""
12371311
Formatting function for scalar values.

pandas/core/arrays/datetimes.py

+42-1
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
)
99
from typing import (
1010
TYPE_CHECKING,
11+
Callable,
1112
Literal,
1213
overload,
1314
)
@@ -37,7 +38,10 @@
3738
to_offset,
3839
tzconversion,
3940
)
40-
from pandas._typing import npt
41+
from pandas._typing import (
42+
FloatFormatType,
43+
npt,
44+
)
4145
from pandas.errors import PerformanceWarning
4246
from pandas.util._exceptions import find_stack_level
4347
from pandas.util._validators import validate_inclusive
@@ -681,6 +685,43 @@ def _format_native_types(
681685
self.asi8, tz=self.tz, format=fmt, na_rep=na_rep
682686
)
683687

688+
def _format_array(
689+
self,
690+
formatter: Callable | None,
691+
float_format: FloatFormatType = None,
692+
na_rep: str = "NaN",
693+
digits: int = None,
694+
space: str | int = None,
695+
justify: str = "right",
696+
decimal: str = ".",
697+
leading_space: bool | None = True,
698+
quoting: int | None = None,
699+
) -> list[str]:
700+
from pandas.io.formats.format import (
701+
Datetime64Formatter,
702+
Datetime64TZFormatter,
703+
)
704+
705+
if is_datetime64tz_dtype(self.dtype):
706+
fmt_klass = Datetime64TZFormatter
707+
else:
708+
fmt_klass = Datetime64Formatter
709+
710+
fmt_obj = fmt_klass(
711+
self,
712+
digits=digits,
713+
na_rep=na_rep,
714+
float_format=float_format,
715+
formatter=formatter,
716+
space=space,
717+
justify=justify,
718+
decimal=decimal,
719+
leading_space=leading_space,
720+
quoting=quoting,
721+
)
722+
723+
return fmt_obj.get_result()
724+
684725
# -----------------------------------------------------------------
685726
# Comparison Methods
686727

pandas/core/indexes/datetimelike.py

+25
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
parsing,
3131
to_offset,
3232
)
33+
from pandas._typing import FloatFormatType
3334
from pandas.compat.numpy import function as nv
3435
from pandas.util._decorators import (
3536
Appender,
@@ -189,6 +190,30 @@ def format(
189190

190191
return self._format_with_header(header, na_rep=na_rep, date_format=date_format)
191192

193+
def _format_array(
194+
self,
195+
formatter: Callable | None,
196+
float_format: FloatFormatType = None,
197+
na_rep: str = "NaN",
198+
digits: int = None,
199+
space: str | int = None,
200+
justify: str = "right",
201+
decimal: str = ".",
202+
leading_space: bool | None = True,
203+
quoting: int | None = None,
204+
):
205+
return self.array._format_array(
206+
formatter=formatter,
207+
float_format=float_format,
208+
na_rep=na_rep,
209+
digits=digits,
210+
space=space,
211+
justify=justify,
212+
decimal=decimal,
213+
leading_space=leading_space,
214+
quoting=quoting,
215+
)
216+
192217
def _format_with_header(
193218
self, header: list[str], na_rep: str = "NaT", date_format: str | None = None
194219
) -> list[str]:

pandas/io/formats/format.py

+23-43
Original file line numberDiff line numberDiff line change
@@ -1276,30 +1276,41 @@ def format_array(
12761276
List[str]
12771277
"""
12781278
fmt_klass: type[GenericArrayFormatter]
1279-
if is_datetime64_dtype(values.dtype):
1279+
1280+
if space is None:
1281+
space = get_option("display.column_space")
1282+
1283+
if float_format is None:
1284+
float_format = get_option("display.float_format")
1285+
1286+
if digits is None:
1287+
digits = get_option("display.precision")
1288+
1289+
if is_extension_array_dtype(values):
1290+
return values._format_array(
1291+
formatter,
1292+
float_format,
1293+
na_rep,
1294+
digits,
1295+
space,
1296+
justify,
1297+
decimal,
1298+
leading_space,
1299+
quoting,
1300+
)
1301+
elif is_datetime64_dtype(values.dtype):
12801302
fmt_klass = Datetime64Formatter
12811303
elif is_datetime64tz_dtype(values.dtype):
12821304
fmt_klass = Datetime64TZFormatter
12831305
elif is_timedelta64_dtype(values.dtype):
12841306
fmt_klass = Timedelta64Formatter
1285-
elif is_extension_array_dtype(values.dtype):
1286-
fmt_klass = ExtensionArrayFormatter
12871307
elif is_float_dtype(values.dtype) or is_complex_dtype(values.dtype):
12881308
fmt_klass = FloatArrayFormatter
12891309
elif is_integer_dtype(values.dtype):
12901310
fmt_klass = IntArrayFormatter
12911311
else:
12921312
fmt_klass = GenericArrayFormatter
12931313

1294-
if space is None:
1295-
space = get_option("display.column_space")
1296-
1297-
if float_format is None:
1298-
float_format = get_option("display.float_format")
1299-
1300-
if digits is None:
1301-
digits = get_option("display.precision")
1302-
13031314
fmt_obj = fmt_klass(
13041315
values,
13051316
digits=digits,
@@ -1633,37 +1644,6 @@ def _format_strings(self) -> list[str]:
16331644
return fmt_values.tolist()
16341645

16351646

1636-
class ExtensionArrayFormatter(GenericArrayFormatter):
1637-
def _format_strings(self) -> list[str]:
1638-
values = extract_array(self.values, extract_numpy=True)
1639-
1640-
formatter = self.formatter
1641-
if formatter is None:
1642-
# error: Item "ndarray" of "Union[Any, Union[ExtensionArray, ndarray]]" has
1643-
# no attribute "_formatter"
1644-
formatter = values._formatter(boxed=True) # type: ignore[union-attr]
1645-
1646-
if isinstance(values, Categorical):
1647-
# Categorical is special for now, so that we can preserve tzinfo
1648-
array = values._internal_get_values()
1649-
else:
1650-
array = np.asarray(values)
1651-
1652-
fmt_values = format_array(
1653-
array,
1654-
formatter,
1655-
float_format=self.float_format,
1656-
na_rep=self.na_rep,
1657-
digits=self.digits,
1658-
space=self.space,
1659-
justify=self.justify,
1660-
decimal=self.decimal,
1661-
leading_space=self.leading_space,
1662-
quoting=self.quoting,
1663-
)
1664-
return fmt_values
1665-
1666-
16671647
def format_percentiles(
16681648
percentiles: (np.ndarray | list[int | float] | list[float] | list[str | float]),
16691649
) -> list[str]:

0 commit comments

Comments
 (0)