-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
Fix/26837 format array #44527
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix/26837 format array #44527
Changes from all commits
3885851
a7ec260
6a03c9a
f84867b
9bcd61e
83cfe06
e33b675
42ea58b
2c8bceb
e1f2cb3
8dcb3e5
04f590b
fb063f5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -29,6 +29,7 @@ | |
AstypeArg, | ||
Dtype, | ||
FillnaOptions, | ||
FloatFormatType, | ||
PositionalIndexer, | ||
ScalarIndexer, | ||
SequenceIndexer, | ||
|
@@ -137,6 +138,7 @@ class ExtensionArray: | |
view | ||
_concat_same_type | ||
_formatter | ||
_format_array | ||
_from_factorized | ||
_from_sequence | ||
_from_sequence_of_strings | ||
|
@@ -167,6 +169,8 @@ class ExtensionArray: | |
|
||
* __repr__ : A default repr for the ExtensionArray. | ||
* _formatter : Print scalars inside a Series or DataFrame. | ||
* _format_array: Full control over formatting an ExtensionArray | ||
to be included in a Series or DataFrame. | ||
|
||
Some methods require casting the ExtensionArray to an ndarray of Python | ||
objects with ``self.astype(object)``, which may be expensive. When | ||
|
@@ -1232,6 +1236,105 @@ def _repr_2d(self) -> str: | |
class_name = f"<{type(self).__name__}>" | ||
return f"{class_name}\n[\n{data}\n]\nShape: {self.shape}, dtype: {self.dtype}" | ||
|
||
def _format_array( | ||
self, | ||
formatter: Callable | None, | ||
*, | ||
float_format: FloatFormatType, | ||
na_rep: str = "NaN", | ||
digits: int, | ||
space: str | int, | ||
justify: str = "right", | ||
decimal: str = ".", | ||
leading_space: bool | None = True, | ||
quoting: int | None = None, | ||
) -> list[str]: | ||
""" | ||
Format an array of values. | ||
|
||
This is called from both the Series and DataFrame reprs. By default, | ||
the ExtensionArray is converted to a NumPy array and formatted using | ||
pandas' normal formatting methods. | ||
|
||
.. versionadded:: 1.4.0 | ||
|
||
Parameters | ||
---------- | ||
formatter : Callable, optional | ||
The function to apply to each element of the array to convert it | ||
to a string. By default, `self._formatter` is used. | ||
float_format : one-parameter function, optional, default None | ||
Formatter function to apply to columns' elements if they are | ||
floats. This function must return a unicode string and will be | ||
applied only to the non-``NaN`` elements, with ``NaN`` being | ||
handled by ``na_rep``. | ||
na_rep : str, optional, default 'NaN' | ||
String representation of ``NaN`` to use. | ||
digits : int, optional | ||
Display precision in terms of decimal places. Defaults to | ||
``pandas.options.display.precision``. | ||
space : int, optional | ||
Defaults to ``pandas.options.display.column_space``. | ||
justify : str, default None | ||
How to justify the column labels. If None uses the option from | ||
the print configuration (controlled by set_option), 'right' out | ||
of the box. Valid values are | ||
|
||
* left | ||
* right | ||
* center | ||
* justify | ||
* justify-all | ||
* start | ||
* end | ||
* inherit | ||
* match-parent | ||
* initial | ||
* unset. | ||
|
||
decimal : str, default '.' | ||
Character recognized as decimal separator, e.g. ',' in Europe. | ||
|
||
leading_space : bool, optional, default True | ||
Whether the array should be formatted with a leading space. | ||
When an array as a column of a Series or DataFrame, we do want | ||
the leading space to pad between columns. | ||
|
||
When formatting an Index subclass | ||
(e.g. IntervalIndex._format_native_types), we don't want the | ||
leading space since it should be left-aligned. | ||
|
||
Returns | ||
------- | ||
list[str] | ||
The list of formatted values for the array. | ||
""" | ||
from pandas.core.construction import extract_array | ||
|
||
from pandas.io.formats.format import format_array | ||
|
||
values = extract_array(self, extract_numpy=True) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why would we need to extract_array? i guess for PandasArray? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That's my guess too. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you check if its really necessary and if so, add a comment as to why |
||
|
||
if formatter is None: | ||
# error: Item "ndarray" of "Union[Any, Union[ExtensionArray, ndarray]]" has | ||
# no attribute "_formatter" | ||
formatter = values._formatter(boxed=True) # type: ignore[union-attr] | ||
|
||
array = np.asarray(values) | ||
fmt_values = format_array( | ||
array, | ||
formatter, | ||
float_format=float_format, | ||
na_rep=na_rep, | ||
digits=digits, | ||
space=space, | ||
justify=justify, | ||
decimal=decimal, | ||
leading_space=leading_space, | ||
quoting=quoting, | ||
) | ||
return fmt_values | ||
|
||
def _formatter(self, boxed: bool = False) -> Callable[[Any], str | None]: | ||
""" | ||
Formatting function for scalar values. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,6 +8,7 @@ | |
) | ||
from typing import ( | ||
TYPE_CHECKING, | ||
Callable, | ||
Literal, | ||
) | ||
import warnings | ||
|
@@ -36,7 +37,10 @@ | |
to_offset, | ||
tzconversion, | ||
) | ||
from pandas._typing import npt | ||
from pandas._typing import ( | ||
FloatFormatType, | ||
npt, | ||
) | ||
from pandas.errors import PerformanceWarning | ||
from pandas.util._exceptions import find_stack_level | ||
from pandas.util._validators import validate_inclusive | ||
|
@@ -680,6 +684,46 @@ def _format_native_types( | |
self.asi8, tz=self.tz, format=fmt, na_rep=na_rep | ||
) | ||
|
||
def _format_array( | ||
self, | ||
formatter: Callable | None, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. im not too familiar with how this gets reached. is e.g. formatter going to always be self._formatter? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think you're right. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. then can we do without the formatter arg? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry I was incorrect. |
||
*, | ||
float_format: FloatFormatType, | ||
na_rep: str = "NaN", | ||
digits: int, | ||
space: str | int, | ||
justify: str = "right", | ||
decimal: str = ".", | ||
leading_space: bool | None = True, | ||
quoting: int | None = None, | ||
) -> list[str]: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same comment this seems like adding a lot of boilerplate that could be handled in the base class no? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is slightly different than the Categorical case. Categorical wants to change the We could add some method to the interface to get the formatting class for an array. I don't really think that we want to publicly expose the ArrayFormatter interface publicly though. If it's purely about the lines of code here, we could have a "private" # in ExtensionArray._format_array
if hasattr(self, "_format_class"):
fmt_klass = self._format_class
else:
fmt_klass = GenericArrayFormatter I dunno. This is all kind of messy. |
||
from pandas.io.formats.format import ( | ||
Datetime64Formatter, | ||
Datetime64TZFormatter, | ||
) | ||
|
||
fmt_klass: type[Datetime64Formatter] | type[Datetime64TZFormatter] | ||
|
||
if is_datetime64tz_dtype(self.dtype): | ||
fmt_klass = Datetime64TZFormatter | ||
else: | ||
fmt_klass = Datetime64Formatter | ||
|
||
fmt_obj = fmt_klass( | ||
self, | ||
digits=digits, | ||
na_rep=na_rep, | ||
float_format=float_format, | ||
formatter=formatter, | ||
space=space, | ||
justify=justify, | ||
decimal=decimal, | ||
leading_space=leading_space, | ||
quoting=quoting, | ||
) | ||
|
||
return fmt_obj.get_result() | ||
|
||
# ----------------------------------------------------------------- | ||
# Comparison Methods | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1276,30 +1276,43 @@ def format_array( | |
List[str] | ||
""" | ||
fmt_klass: type[GenericArrayFormatter] | ||
if is_datetime64_dtype(values.dtype): | ||
|
||
if space is None: | ||
space = get_option("display.column_space") | ||
|
||
if float_format is None: | ||
float_format = get_option("display.float_format") | ||
|
||
if digits is None: | ||
digits = get_option("display.precision") | ||
|
||
values = extract_array(values, extract_numpy=True) | ||
|
||
if is_extension_array_dtype(values): | ||
return values._format_array( | ||
formatter, | ||
float_format=float_format, | ||
na_rep=na_rep, | ||
digits=digits, | ||
space=space, | ||
justify=justify, | ||
decimal=decimal, | ||
leading_space=leading_space, | ||
quoting=quoting, | ||
) | ||
elif is_datetime64_dtype(values.dtype): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I should check to see if these datetlike are actually hit anymore. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yah. i expect that if you use |
||
fmt_klass = Datetime64Formatter | ||
elif is_datetime64tz_dtype(values.dtype): | ||
fmt_klass = Datetime64TZFormatter | ||
elif is_timedelta64_dtype(values.dtype): | ||
fmt_klass = Timedelta64Formatter | ||
elif is_extension_array_dtype(values.dtype): | ||
fmt_klass = ExtensionArrayFormatter | ||
elif is_float_dtype(values.dtype) or is_complex_dtype(values.dtype): | ||
fmt_klass = FloatArrayFormatter | ||
elif is_integer_dtype(values.dtype): | ||
fmt_klass = IntArrayFormatter | ||
else: | ||
fmt_klass = GenericArrayFormatter | ||
|
||
if space is None: | ||
space = get_option("display.column_space") | ||
|
||
if float_format is None: | ||
float_format = get_option("display.float_format") | ||
|
||
if digits is None: | ||
digits = get_option("display.precision") | ||
|
||
fmt_obj = fmt_klass( | ||
values, | ||
digits=digits, | ||
|
@@ -1633,37 +1646,6 @@ def _format_strings(self) -> list[str]: | |
return fmt_values.tolist() | ||
|
||
|
||
class ExtensionArrayFormatter(GenericArrayFormatter): | ||
def _format_strings(self) -> list[str]: | ||
values = extract_array(self.values, extract_numpy=True) | ||
|
||
formatter = self.formatter | ||
if formatter is None: | ||
# error: Item "ndarray" of "Union[Any, Union[ExtensionArray, ndarray]]" has | ||
# no attribute "_formatter" | ||
formatter = values._formatter(boxed=True) # type: ignore[union-attr] | ||
|
||
if isinstance(values, Categorical): | ||
# Categorical is special for now, so that we can preserve tzinfo | ||
array = values._internal_get_values() | ||
else: | ||
array = np.asarray(values) | ||
|
||
fmt_values = format_array( | ||
array, | ||
formatter, | ||
float_format=self.float_format, | ||
na_rep=self.na_rep, | ||
digits=self.digits, | ||
space=self.space, | ||
justify=self.justify, | ||
decimal=self.decimal, | ||
leading_space=self.leading_space, | ||
quoting=self.quoting, | ||
) | ||
return fmt_values | ||
|
||
|
||
def format_percentiles( | ||
percentiles: (np.ndarray | list[int | float] | list[float] | list[str | float]), | ||
) -> list[str]: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
should we take this opportunity to make a FormatOptionsType that has all of these defaults?