pandas-dev · TomAugspurger · Nov 16, 2021 · Nov 19, 2021 · Nov 19, 2021 · Nov 20, 2021
diff --git a/doc/source/reference/extensions.rst b/doc/source/reference/extensions.rst
@@ -33,6 +33,7 @@ objects.
       :toctree: api/
 
       api.extensions.ExtensionArray._concat_same_type
+      api.extensions.ExtensionArray._format_array
       api.extensions.ExtensionArray._formatter
       api.extensions.ExtensionArray._from_factorized
       api.extensions.ExtensionArray._from_sequence

diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
@@ -204,6 +204,7 @@ Other enhancements
 - :meth:`IntegerArray.all` , :meth:`IntegerArray.any`, :meth:`FloatingArray.any`, and :meth:`FloatingArray.all` use Kleene logic (:issue:`41967`)
 - Added support for nullable boolean and integer types in :meth:`DataFrame.to_stata`, :class:`~pandas.io.stata.StataWriter`, :class:`~pandas.io.stata.StataWriter117`, and :class:`~pandas.io.stata.StataWriterUTF8` (:issue:`40855`)
 - :meth:`DataFrame.__pos__`, :meth:`DataFrame.__neg__` now retain ``ExtensionDtype`` dtypes (:issue:`43883`)
+- Added :meth:`api.extension.ExtensionArray._format_array` for extension arrays to control how they are formatted in ``Series`` and ``DataFrame`` (:issue:`26837`)
 - The error raised when an optional dependency can't be imported now includes the original exception, for easier investigation (:issue:`43882`)
 - Added :meth:`.ExponentialMovingWindow.sum` (:issue:`13297`)
 - :meth:`Series.str.split` now supports a ``regex`` argument that explicitly specifies whether the pattern is a regular expression. Default is ``None`` (:issue:`43563`, :issue:`32835`, :issue:`25549`)

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
@@ -29,6 +29,7 @@
     AstypeArg,
     Dtype,
     FillnaOptions,
+    FloatFormatType,
     PositionalIndexer,
     ScalarIndexer,
     SequenceIndexer,
@@ -137,6 +138,7 @@ class ExtensionArray:
     view
     _concat_same_type
     _formatter
+    _format_array
     _from_factorized
     _from_sequence
     _from_sequence_of_strings
@@ -167,6 +169,8 @@ class ExtensionArray:
 
     * __repr__ : A default repr for the ExtensionArray.
     * _formatter : Print scalars inside a Series or DataFrame.
+    * _format_array: Full control over formatting an ExtensionArray
+      to be included in a Series or DataFrame.
 
     Some methods require casting the ExtensionArray to an ndarray of Python
     objects with ``self.astype(object)``, which may be expensive. When
@@ -1232,6 +1236,105 @@ def _repr_2d(self) -> str:
         class_name = f"<{type(self).__name__}>"
         return f"{class_name}\n[\n{data}\n]\nShape: {self.shape}, dtype: {self.dtype}"
 
+    def _format_array(
+        self,
+        formatter: Callable | None,
+        *,
+        float_format: FloatFormatType,
+        na_rep: str = "NaN",
+        digits: int,
+        space: str | int,
+        justify: str = "right",
+        decimal: str = ".",
+        leading_space: bool | None = True,
+        quoting: int | None = None,
+    ) -> list[str]:
+        """
+        Format an array of values.
+
+        This is called from both the Series and DataFrame reprs. By default,
+        the ExtensionArray is converted to a NumPy array and formatted using
+        pandas' normal formatting methods.
+
+        .. versionadded:: 1.4.0
+
+        Parameters
+        ----------
+        formatter : Callable, optional
+            The function to apply to each element of the array to convert it
+            to a string. By default, `self._formatter` is used.
+        float_format : one-parameter function, optional, default None
+            Formatter function to apply to columns' elements if they are
+            floats. This function must return a unicode string and will be
+            applied only to the non-``NaN`` elements, with ``NaN`` being
+            handled by ``na_rep``.
+        na_rep : str, optional, default 'NaN'
+            String representation of ``NaN`` to use.
+        digits : int, optional
+            Display precision in terms of decimal places. Defaults to
+            ``pandas.options.display.precision``.
+        space : int, optional
+            Defaults to ``pandas.options.display.column_space``.
+        justify : str, default None
+            How to justify the column labels. If None uses the option from
+            the print configuration (controlled by set_option), 'right' out
+            of the box. Valid values are
+
+            * left
+            * right
+            * center
+            * justify
+            * justify-all
+            * start
+            * end
+            * inherit
+            * match-parent
+            * initial
+            * unset.
+
+        decimal : str, default '.'
+            Character recognized as decimal separator, e.g. ',' in Europe.
+
+        leading_space : bool, optional, default True
+            Whether the array should be formatted with a leading space.
+            When an array as a column of a Series or DataFrame, we do want
+            the leading space to pad between columns.
+
+            When formatting an Index subclass
+            (e.g. IntervalIndex._format_native_types), we don't want the
+            leading space since it should be left-aligned.
+
+        Returns
+        -------
+        list[str]
+            The list of formatted values for the array.
+        """
+        from pandas.core.construction import extract_array
+
+        from pandas.io.formats.format import format_array
+
+        values = extract_array(self, extract_numpy=True)
+
+        if formatter is None:
+            # error: Item "ndarray" of "Union[Any, Union[ExtensionArray, ndarray]]" has
+            # no attribute "_formatter"
+            formatter = values._formatter(boxed=True)  # type: ignore[union-attr]
+
+        array = np.asarray(values)
+        fmt_values = format_array(
+            array,
+            formatter,
+            float_format=float_format,
+            na_rep=na_rep,
+            digits=digits,
+            space=space,
+            justify=justify,
+            decimal=decimal,
+            leading_space=leading_space,
+            quoting=quoting,
+        )
+        return fmt_values
+
     def _formatter(self, boxed: bool = False) -> Callable[[Any], str | None]:
         """
         Formatting function for scalar values.

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
@@ -6,6 +6,7 @@
 from shutil import get_terminal_size
 from typing import (
     TYPE_CHECKING,
+    Callable,
     Hashable,
     Sequence,
     TypeVar,
@@ -35,6 +36,7 @@
     ArrayLike,
     AstypeArg,
     Dtype,
+    FloatFormatType,
     NpDtype,
     Ordered,
     Shape,
@@ -1950,6 +1952,36 @@ def __contains__(self, key) -> bool:
     # ------------------------------------------------------------------
     # Rendering Methods
 
+    def _format_array(
+        self,
+        formatter: Callable | None,
+        *,
+        float_format: FloatFormatType,
+        na_rep: str = "NaN",
+        digits: int,
+        space: str | int,
+        justify: str = "right",
+        decimal: str = ".",
+        leading_space: bool | None = True,
+        quoting: int | None = None,
+    ) -> list[str]:
+        from pandas.io.formats.format import format_array
+
+        array = self._internal_get_values()
+        fmt_values = format_array(
+            array,
+            formatter,
+            float_format=float_format,
+            na_rep=na_rep,
+            digits=digits,
+            space=space,
+            justify=justify,
+            decimal=decimal,
+            leading_space=leading_space,
+            quoting=quoting,
+        )
+        return fmt_values
+
     def _formatter(self, boxed: bool = False):
         # Defer to CategoricalFormatter's formatter.
         return None

diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py
@@ -8,6 +8,7 @@
 )
 from typing import (
     TYPE_CHECKING,
+    Callable,
     Literal,
 )
 import warnings
@@ -36,7 +37,10 @@
     to_offset,
     tzconversion,
 )
-from pandas._typing import npt
+from pandas._typing import (
+    FloatFormatType,
+    npt,
+)
 from pandas.errors import PerformanceWarning
 from pandas.util._exceptions import find_stack_level
 from pandas.util._validators import validate_inclusive
@@ -680,6 +684,46 @@ def _format_native_types(
             self.asi8, tz=self.tz, format=fmt, na_rep=na_rep
         )
 
+    def _format_array(
+        self,
+        formatter: Callable | None,
+        *,
+        float_format: FloatFormatType,
+        na_rep: str = "NaN",
+        digits: int,
+        space: str | int,
+        justify: str = "right",
+        decimal: str = ".",
+        leading_space: bool | None = True,
+        quoting: int | None = None,
+    ) -> list[str]:
+        from pandas.io.formats.format import (
+            Datetime64Formatter,
+            Datetime64TZFormatter,
+        )
+
+        fmt_klass: type[Datetime64Formatter] | type[Datetime64TZFormatter]
+
+        if is_datetime64tz_dtype(self.dtype):
+            fmt_klass = Datetime64TZFormatter
+        else:
+            fmt_klass = Datetime64Formatter
+
+        fmt_obj = fmt_klass(
+            self,
+            digits=digits,
+            na_rep=na_rep,
+            float_format=float_format,
+            formatter=formatter,
+            space=space,
+            justify=justify,
+            decimal=decimal,
+            leading_space=leading_space,
+            quoting=quoting,
+        )
+
+        return fmt_obj.get_result()
+
     # -----------------------------------------------------------------
     # Comparison Methods
 

diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
@@ -1276,30 +1276,43 @@ def format_array(
     List[str]
     """
     fmt_klass: type[GenericArrayFormatter]
-    if is_datetime64_dtype(values.dtype):
+
+    if space is None:
+        space = get_option("display.column_space")
+
+    if float_format is None:
+        float_format = get_option("display.float_format")
+
+    if digits is None:
+        digits = get_option("display.precision")
+
+    values = extract_array(values, extract_numpy=True)
+
+    if is_extension_array_dtype(values):
+        return values._format_array(
+            formatter,
+            float_format=float_format,
+            na_rep=na_rep,
+            digits=digits,
+            space=space,
+            justify=justify,
+            decimal=decimal,
+            leading_space=leading_space,
+            quoting=quoting,
+        )
+    elif is_datetime64_dtype(values.dtype):
         fmt_klass = Datetime64Formatter
     elif is_datetime64tz_dtype(values.dtype):
         fmt_klass = Datetime64TZFormatter
     elif is_timedelta64_dtype(values.dtype):
         fmt_klass = Timedelta64Formatter
-    elif is_extension_array_dtype(values.dtype):
-        fmt_klass = ExtensionArrayFormatter
     elif is_float_dtype(values.dtype) or is_complex_dtype(values.dtype):
         fmt_klass = FloatArrayFormatter
     elif is_integer_dtype(values.dtype):
         fmt_klass = IntArrayFormatter
     else:
         fmt_klass = GenericArrayFormatter
 
-    if space is None:
-        space = get_option("display.column_space")
-
-    if float_format is None:
-        float_format = get_option("display.float_format")
-
-    if digits is None:
-        digits = get_option("display.precision")
-
     fmt_obj = fmt_klass(
         values,
         digits=digits,
@@ -1633,37 +1646,6 @@ def _format_strings(self) -> list[str]:
         return fmt_values.tolist()
 
 
-class ExtensionArrayFormatter(GenericArrayFormatter):
-    def _format_strings(self) -> list[str]:
-        values = extract_array(self.values, extract_numpy=True)
-
-        formatter = self.formatter
-        if formatter is None:
-            # error: Item "ndarray" of "Union[Any, Union[ExtensionArray, ndarray]]" has
-            # no attribute "_formatter"
-            formatter = values._formatter(boxed=True)  # type: ignore[union-attr]
-
-        if isinstance(values, Categorical):
-            # Categorical is special for now, so that we can preserve tzinfo
-            array = values._internal_get_values()
-        else:
-            array = np.asarray(values)
-
-        fmt_values = format_array(
-            array,
-            formatter,
-            float_format=self.float_format,
-            na_rep=self.na_rep,
-            digits=self.digits,
-            space=self.space,
-            justify=self.justify,
-            decimal=self.decimal,
-            leading_space=self.leading_space,
-            quoting=self.quoting,
-        )
-        return fmt_values
-
-
 def format_percentiles(
     percentiles: (np.ndarray | list[int | float] | list[float] | list[str | float]),
 ) -> list[str]: