diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 803d1c914c954..8c85c4e961d99 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -206,8 +206,8 @@ format as fmt, ) from pandas.io.formats.info import ( - BaseInfo, DataFrameInfo, + frame_sub_kwargs, ) import pandas.plotting @@ -3138,122 +3138,7 @@ def to_xml( return xml_formatter.write_output() # ---------------------------------------------------------------------- - @Substitution( - klass="DataFrame", - type_sub=" and columns", - max_cols_sub=dedent( - """\ - max_cols : int, optional - When to switch from the verbose to the truncated output. If the - DataFrame has more than `max_cols` columns, the truncated output - is used. By default, the setting in - ``pandas.options.display.max_info_columns`` is used.""" - ), - show_counts_sub=dedent( - """\ - show_counts : bool, optional - Whether to show the non-null counts. By default, this is shown - only if the DataFrame is smaller than - ``pandas.options.display.max_info_rows`` and - ``pandas.options.display.max_info_columns``. A value of True always - shows the counts, and False never shows the counts. - null_counts : bool, optional - .. deprecated:: 1.2.0 - Use show_counts instead.""" - ), - examples_sub=dedent( - """\ - >>> int_values = [1, 2, 3, 4, 5] - >>> text_values = ['alpha', 'beta', 'gamma', 'delta', 'epsilon'] - >>> float_values = [0.0, 0.25, 0.5, 0.75, 1.0] - >>> df = pd.DataFrame({"int_col": int_values, "text_col": text_values, - ... "float_col": float_values}) - >>> df - int_col text_col float_col - 0 1 alpha 0.00 - 1 2 beta 0.25 - 2 3 gamma 0.50 - 3 4 delta 0.75 - 4 5 epsilon 1.00 - - Prints information of all columns: - - >>> df.info(verbose=True) - - RangeIndex: 5 entries, 0 to 4 - Data columns (total 3 columns): - # Column Non-Null Count Dtype - --- ------ -------------- ----- - 0 int_col 5 non-null int64 - 1 text_col 5 non-null object - 2 float_col 5 non-null float64 - dtypes: float64(1), int64(1), object(1) - memory usage: 248.0+ bytes - - Prints a summary of columns count and its dtypes but not per column - information: - - >>> df.info(verbose=False) - - RangeIndex: 5 entries, 0 to 4 - Columns: 3 entries, int_col to float_col - dtypes: float64(1), int64(1), object(1) - memory usage: 248.0+ bytes - - Pipe output of DataFrame.info to buffer instead of sys.stdout, get - buffer content and writes to a text file: - - >>> import io - >>> buffer = io.StringIO() - >>> df.info(buf=buffer) - >>> s = buffer.getvalue() - >>> with open("df_info.txt", "w", - ... encoding="utf-8") as f: # doctest: +SKIP - ... f.write(s) - 260 - - The `memory_usage` parameter allows deep introspection mode, specially - useful for big DataFrames and fine-tune memory optimization: - - >>> random_strings_array = np.random.choice(['a', 'b', 'c'], 10 ** 6) - >>> df = pd.DataFrame({ - ... 'column_1': np.random.choice(['a', 'b', 'c'], 10 ** 6), - ... 'column_2': np.random.choice(['a', 'b', 'c'], 10 ** 6), - ... 'column_3': np.random.choice(['a', 'b', 'c'], 10 ** 6) - ... }) - >>> df.info() - - RangeIndex: 1000000 entries, 0 to 999999 - Data columns (total 3 columns): - # Column Non-Null Count Dtype - --- ------ -------------- ----- - 0 column_1 1000000 non-null object - 1 column_2 1000000 non-null object - 2 column_3 1000000 non-null object - dtypes: object(3) - memory usage: 22.9+ MB - - >>> df.info(memory_usage='deep') - - RangeIndex: 1000000 entries, 0 to 999999 - Data columns (total 3 columns): - # Column Non-Null Count Dtype - --- ------ -------------- ----- - 0 column_1 1000000 non-null object - 1 column_2 1000000 non-null object - 2 column_3 1000000 non-null object - dtypes: object(3) - memory usage: 165.9 MB""" - ), - see_also_sub=dedent( - """\ - DataFrame.describe: Generate descriptive statistics of DataFrame - columns. - DataFrame.memory_usage: Memory usage of DataFrame columns.""" - ), - version_added_sub="", - ) - @doc(BaseInfo.render) + @doc(DataFrameInfo.render, **frame_sub_kwargs) def info( self, verbose: bool | None = None, diff --git a/pandas/io/formats/info.py b/pandas/io/formats/info.py index ddd2420731028..9340d020cd6ce 100644 --- a/pandas/io/formats/info.py +++ b/pandas/io/formats/info.py @@ -5,6 +5,7 @@ abstractmethod, ) import sys +from textwrap import dedent from typing import ( TYPE_CHECKING, Iterable, @@ -19,6 +20,7 @@ Dtype, WriteBuffer, ) +from pandas.util._decorators import doc from pandas.core.indexes.api import Index @@ -32,6 +34,186 @@ ) +frame_max_cols_sub = dedent( + """\ + max_cols : int, optional + When to switch from the verbose to the truncated output. If the + DataFrame has more than `max_cols` columns, the truncated output + is used. By default, the setting in + ``pandas.options.display.max_info_columns`` is used.""" +) + + +show_counts_sub = dedent( + """\ + show_counts : bool, optional + Whether to show the non-null counts. By default, this is shown + only if the DataFrame is smaller than + ``pandas.options.display.max_info_rows`` and + ``pandas.options.display.max_info_columns``. A value of True always + shows the counts, and False never shows the counts. + null_counts : bool, optional + .. deprecated:: 1.2.0 + Use show_counts instead.""" +) + + +frame_examples_sub = dedent( + """\ + >>> int_values = [1, 2, 3, 4, 5] + >>> text_values = ['alpha', 'beta', 'gamma', 'delta', 'epsilon'] + >>> float_values = [0.0, 0.25, 0.5, 0.75, 1.0] + >>> df = pd.DataFrame({"int_col": int_values, "text_col": text_values, + ... "float_col": float_values}) + >>> df + int_col text_col float_col + 0 1 alpha 0.00 + 1 2 beta 0.25 + 2 3 gamma 0.50 + 3 4 delta 0.75 + 4 5 epsilon 1.00 + + Prints information of all columns: + + >>> df.info(verbose=True) + + RangeIndex: 5 entries, 0 to 4 + Data columns (total 3 columns): + # Column Non-Null Count Dtype + --- ------ -------------- ----- + 0 int_col 5 non-null int64 + 1 text_col 5 non-null object + 2 float_col 5 non-null float64 + dtypes: float64(1), int64(1), object(1) + memory usage: 248.0+ bytes + + Prints a summary of columns count and its dtypes but not per column + information: + + >>> df.info(verbose=False) + + RangeIndex: 5 entries, 0 to 4 + Columns: 3 entries, int_col to float_col + dtypes: float64(1), int64(1), object(1) + memory usage: 248.0+ bytes + + Pipe output of DataFrame.info to buffer instead of sys.stdout, get + buffer content and writes to a text file: + + >>> import io + >>> buffer = io.StringIO() + >>> df.info(buf=buffer) + >>> s = buffer.getvalue() + >>> with open("df_info.txt", "w", + ... encoding="utf-8") as f: # doctest: +SKIP + ... f.write(s) + 260 + + The `memory_usage` parameter allows deep introspection mode, specially + useful for big DataFrames and fine-tune memory optimization: + + >>> random_strings_array = np.random.choice(['a', 'b', 'c'], 10 ** 6) + >>> df = pd.DataFrame({ + ... 'column_1': np.random.choice(['a', 'b', 'c'], 10 ** 6), + ... 'column_2': np.random.choice(['a', 'b', 'c'], 10 ** 6), + ... 'column_3': np.random.choice(['a', 'b', 'c'], 10 ** 6) + ... }) + >>> df.info() + + RangeIndex: 1000000 entries, 0 to 999999 + Data columns (total 3 columns): + # Column Non-Null Count Dtype + --- ------ -------------- ----- + 0 column_1 1000000 non-null object + 1 column_2 1000000 non-null object + 2 column_3 1000000 non-null object + dtypes: object(3) + memory usage: 22.9+ MB + + >>> df.info(memory_usage='deep') + + RangeIndex: 1000000 entries, 0 to 999999 + Data columns (total 3 columns): + # Column Non-Null Count Dtype + --- ------ -------------- ----- + 0 column_1 1000000 non-null object + 1 column_2 1000000 non-null object + 2 column_3 1000000 non-null object + dtypes: object(3) + memory usage: 165.9 MB""" +) + + +frame_see_also_sub = dedent( + """\ + DataFrame.describe: Generate descriptive statistics of DataFrame + columns. + DataFrame.memory_usage: Memory usage of DataFrame columns.""" +) + + +frame_sub_kwargs = { + "klass": "DataFrame", + "type_sub": " and columns", + "max_cols_sub": frame_max_cols_sub, + "show_counts_sub": show_counts_sub, + "examples_sub": frame_examples_sub, + "see_also_sub": frame_see_also_sub, + "version_added_sub": "", +} + + +INFO_DOCSTRING = dedent( + """ + Print a concise summary of a {klass}. + + This method prints information about a {klass} including + the index dtype{type_sub}, non-null values and memory usage. + {version_added_sub}\ + + Parameters + ---------- + data : {klass} + {klass} to print information about. + verbose : bool, optional + Whether to print the full summary. By default, the setting in + ``pandas.options.display.max_info_columns`` is followed. + buf : writable buffer, defaults to sys.stdout + Where to send the output. By default, the output is printed to + sys.stdout. Pass a writable buffer if you need to further process + the output. + {max_cols_sub} + memory_usage : bool, str, optional + Specifies whether total memory usage of the {klass} + elements (including the index) should be displayed. By default, + this follows the ``pandas.options.display.memory_usage`` setting. + + True always show memory usage. False never shows memory usage. + A value of 'deep' is equivalent to "True with deep introspection". + Memory usage is shown in human-readable units (base-2 + representation). Without deep introspection a memory estimation is + made based in column dtype and number of rows assuming values + consume the same memory amount for corresponding dtypes. With deep + memory introspection, a real memory usage calculation is performed + at the cost of computational resources. + {show_counts_sub} + + Returns + ------- + None + This method prints a summary of a {klass} and returns None. + + See Also + -------- + {see_also_sub} + + Examples + -------- + {examples_sub} + """ +) + + def _put_str(s: str | Dtype, space: int) -> str: """ Make string of specified length, padding to the right if necessary. @@ -178,53 +360,7 @@ def render( verbose: bool | None, show_counts: bool | None, ) -> None: - """ - Print a concise summary of a %(klass)s. - - This method prints information about a %(klass)s including - the index dtype%(type_sub)s, non-null values and memory usage. - %(version_added_sub)s\ - - Parameters - ---------- - data : %(klass)s - %(klass)s to print information about. - verbose : bool, optional - Whether to print the full summary. By default, the setting in - ``pandas.options.display.max_info_columns`` is followed. - buf : writable buffer, defaults to sys.stdout - Where to send the output. By default, the output is printed to - sys.stdout. Pass a writable buffer if you need to further process - the output. - %(max_cols_sub)s - memory_usage : bool, str, optional - Specifies whether total memory usage of the %(klass)s - elements (including the index) should be displayed. By default, - this follows the ``pandas.options.display.memory_usage`` setting. - - True always show memory usage. False never shows memory usage. - A value of 'deep' is equivalent to "True with deep introspection". - Memory usage is shown in human-readable units (base-2 - representation). Without deep introspection a memory estimation is - made based in column dtype and number of rows assuming values - consume the same memory amount for corresponding dtypes. With deep - memory introspection, a real memory usage calculation is performed - at the cost of computational resources. - %(show_counts_sub)s - - Returns - ------- - None - This method prints a summary of a %(klass)s and returns None. - - See Also - -------- - %(see_also_sub)s - - Examples - -------- - %(examples_sub)s - """ + pass class DataFrameInfo(BaseInfo): @@ -286,6 +422,16 @@ def memory_usage_bytes(self) -> int: deep = False return self.data.memory_usage(index=True, deep=deep).sum() + @doc( + INFO_DOCSTRING, + klass="DataFrame", + type_sub=" and columns", + max_cols_sub=frame_max_cols_sub, + show_counts_sub=show_counts_sub, + examples_sub=frame_examples_sub, + see_also_sub=frame_see_also_sub, + version_added_sub="", + ) def render( self, *,