Refactor EA formatting

Tom Augspurger · Tom Augspurger · commit 38858510d154 · 2021-11-19T11:25:12.000-06:00
Moves responsibility for converting EAs to a List[str] from
pandas.io.formats to a method on the EA.
diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
@@ -29,6 +29,7 @@
     AstypeArg,
     Dtype,
     FillnaOptions,
+    FloatFormatType,
     PositionalIndexer,
     ScalarIndexer,
     SequenceIndexer,
@@ -137,6 +138,7 @@ class ExtensionArray:
     view
     _concat_same_type
     _formatter
+    _format_array
     _from_factorized
     _from_sequence
     _from_sequence_of_strings
@@ -167,6 +169,8 @@ class ExtensionArray:
 
     * __repr__ : A default repr for the ExtensionArray.
     * _formatter : Print scalars inside a Series or DataFrame.
+    * _format_array: Full control over formatting an ExtensionArray
+      to be included in a Series or DataFrame.
 
     Some methods require casting the ExtensionArray to an ndarray of Python
     objects with ``self.astype(object)``, which may be expensive. When
@@ -1232,6 +1236,76 @@ def _repr_2d(self) -> str:
         class_name = f"<{type(self).__name__}>"
         return f"{class_name}\n[\n{data}\n]\nShape: {self.shape}, dtype: {self.dtype}"
 
+    def _format_array(
+        self,
+        formatter: Callable | None,
+        float_format: FloatFormatType = None,
+        na_rep: str = "NaN",
+        digits: int = None,
+        space: str | int = None,
+        justify: str = "right",
+        decimal: str = ".",
+        leading_space: bool | None = True,
+        quoting: int | None = None,
+    ) -> list[str]:
+        """
+        Format an array of of values.
+
+        Parameters
+        ----------
+        formatter : Callable, optional
+            The function to apply to each element of the array to convert it
+            to a string. By default, `self._formatter` is used.
+        float_format
+        na_rep
+        digits
+        space
+        justify
+        decimal
+        leading_space : bool, optional, default True
+            Whether the array should be formatted with a leading space.
+            When an array as a column of a Series or DataFrame, we do want
+            the leading space to pad between columns.
+
+            When formatting an Index subclass
+            (e.g. IntervalIndex._format_native_types), we don't want the
+            leading space since it should be left-aligned.
+
+
+        """
+        from pandas import Categorical
+        from pandas.core.construction import extract_array
+
+        from pandas.io.formats.format import format_array
+
+        # values = self
+        values = extract_array(self, extract_numpy=True)
+
+        if formatter is None:
+            # error: Item "ndarray" of "Union[Any, Union[ExtensionArray, ndarray]]" has
+            # no attribute "_formatter"
+            formatter = values._formatter(boxed=True)  # type: ignore[union-attr]
+
+        if isinstance(values, Categorical):
+            # Categorical is special for now, so that we can preserve tzinfo
+            array = values._internal_get_values()
+        else:
+            array = np.asarray(values)
+
+        fmt_values = format_array(
+            array,
+            formatter,
+            float_format=float_format,
+            na_rep=na_rep,
+            digits=digits,
+            space=space,
+            justify=justify,
+            decimal=decimal,
+            leading_space=leading_space,
+            quoting=quoting,
+        )
+        return fmt_values
+
     def _formatter(self, boxed: bool = False) -> Callable[[Any], str | None]:
         """
         Formatting function for scalar values.
diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py
@@ -8,6 +8,7 @@
 )
 from typing import (
     TYPE_CHECKING,
+    Callable,
     Literal,
     overload,
 )
@@ -37,7 +38,10 @@
     to_offset,
     tzconversion,
 )
-from pandas._typing import npt
+from pandas._typing import (
+    FloatFormatType,
+    npt,
+)
 from pandas.errors import PerformanceWarning
 from pandas.util._exceptions import find_stack_level
 from pandas.util._validators import validate_inclusive
@@ -681,6 +685,43 @@ def _format_native_types(
             self.asi8, tz=self.tz, format=fmt, na_rep=na_rep
         )
 
+    def _format_array(
+        self,
+        formatter: Callable | None,
+        float_format: FloatFormatType = None,
+        na_rep: str = "NaN",
+        digits: int = None,
+        space: str | int = None,
+        justify: str = "right",
+        decimal: str = ".",
+        leading_space: bool | None = True,
+        quoting: int | None = None,
+    ) -> list[str]:
+        from pandas.io.formats.format import (
+            Datetime64Formatter,
+            Datetime64TZFormatter,
+        )
+
+        if is_datetime64tz_dtype(self.dtype):
+            fmt_klass = Datetime64TZFormatter
+        else:
+            fmt_klass = Datetime64Formatter
+
+        fmt_obj = fmt_klass(
+            self,
+            digits=digits,
+            na_rep=na_rep,
+            float_format=float_format,
+            formatter=formatter,
+            space=space,
+            justify=justify,
+            decimal=decimal,
+            leading_space=leading_space,
+            quoting=quoting,
+        )
+
+        return fmt_obj.get_result()
+
     # -----------------------------------------------------------------
     # Comparison Methods
 
diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py
@@ -30,6 +30,7 @@
     parsing,
     to_offset,
 )
+from pandas._typing import FloatFormatType
 from pandas.compat.numpy import function as nv
 from pandas.util._decorators import (
     Appender,
@@ -189,6 +190,30 @@ def format(
 
         return self._format_with_header(header, na_rep=na_rep, date_format=date_format)
 
+    def _format_array(
+        self,
+        formatter: Callable | None,
+        float_format: FloatFormatType = None,
+        na_rep: str = "NaN",
+        digits: int = None,
+        space: str | int = None,
+        justify: str = "right",
+        decimal: str = ".",
+        leading_space: bool | None = True,
+        quoting: int | None = None,
+    ):
+        return self.array._format_array(
+            formatter=formatter,
+            float_format=float_format,
+            na_rep=na_rep,
+            digits=digits,
+            space=space,
+            justify=justify,
+            decimal=decimal,
+            leading_space=leading_space,
+            quoting=quoting,
+        )
+
     def _format_with_header(
         self, header: list[str], na_rep: str = "NaT", date_format: str | None = None
     ) -> list[str]:
diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
@@ -1276,30 +1276,41 @@ def format_array(
     List[str]
     """
     fmt_klass: type[GenericArrayFormatter]
-    if is_datetime64_dtype(values.dtype):
+
+    if space is None:
+        space = get_option("display.column_space")
+
+    if float_format is None:
+        float_format = get_option("display.float_format")
+
+    if digits is None:
+        digits = get_option("display.precision")
+
+    if is_extension_array_dtype(values):
+        return values._format_array(
+            formatter,
+            float_format,
+            na_rep,
+            digits,
+            space,
+            justify,
+            decimal,
+            leading_space,
+            quoting,
+        )
+    elif is_datetime64_dtype(values.dtype):
         fmt_klass = Datetime64Formatter
     elif is_datetime64tz_dtype(values.dtype):
         fmt_klass = Datetime64TZFormatter
     elif is_timedelta64_dtype(values.dtype):
         fmt_klass = Timedelta64Formatter
-    elif is_extension_array_dtype(values.dtype):
-        fmt_klass = ExtensionArrayFormatter
     elif is_float_dtype(values.dtype) or is_complex_dtype(values.dtype):
         fmt_klass = FloatArrayFormatter
     elif is_integer_dtype(values.dtype):
         fmt_klass = IntArrayFormatter
     else:
         fmt_klass = GenericArrayFormatter
 
-    if space is None:
-        space = get_option("display.column_space")
-
-    if float_format is None:
-        float_format = get_option("display.float_format")
-
-    if digits is None:
-        digits = get_option("display.precision")
-
     fmt_obj = fmt_klass(
         values,
         digits=digits,
@@ -1633,37 +1644,6 @@ def _format_strings(self) -> list[str]:
         return fmt_values.tolist()
 
 
-class ExtensionArrayFormatter(GenericArrayFormatter):
-    def _format_strings(self) -> list[str]:
-        values = extract_array(self.values, extract_numpy=True)
-
-        formatter = self.formatter
-        if formatter is None:
-            # error: Item "ndarray" of "Union[Any, Union[ExtensionArray, ndarray]]" has
-            # no attribute "_formatter"
-            formatter = values._formatter(boxed=True)  # type: ignore[union-attr]
-
-        if isinstance(values, Categorical):
-            # Categorical is special for now, so that we can preserve tzinfo
-            array = values._internal_get_values()
-        else:
-            array = np.asarray(values)
-
-        fmt_values = format_array(
-            array,
-            formatter,
-            float_format=self.float_format,
-            na_rep=self.na_rep,
-            digits=self.digits,
-            space=self.space,
-            justify=self.justify,
-            decimal=self.decimal,
-            leading_space=self.leading_space,
-            quoting=self.quoting,
-        )
-        return fmt_values
-
-
 def format_percentiles(
     percentiles: (np.ndarray | list[int | float] | list[float] | list[str | float]),
 ) -> list[str]: