-
-
Notifications
You must be signed in to change notification settings - Fork 18.9k
DOC: move info docs to DataFrameInfo #38062
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
5151e33
53a47cd
ffce3d5
b632c6c
642a308
751143c
7e21334
d326198
05efde0
a686a22
f1b3371
6041cd9
013ee3b
5814bc9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,6 +2,7 @@ | |
|
||
from abc import ABC, abstractmethod | ||
import sys | ||
from textwrap import dedent | ||
from typing import ( | ||
IO, | ||
TYPE_CHECKING, | ||
|
@@ -17,6 +18,7 @@ | |
from pandas._config import get_option | ||
|
||
from pandas._typing import Dtype, FrameOrSeriesUnion | ||
from pandas.util._decorators import doc | ||
|
||
from pandas.core.indexes.api import Index | ||
|
||
|
@@ -27,6 +29,132 @@ | |
from pandas.core.frame import DataFrame | ||
|
||
|
||
frame_max_cols_sub = dedent( | ||
"""\ | ||
max_cols : int, optional | ||
When to switch from the verbose to the truncated output. If the | ||
DataFrame has more than `max_cols` columns, the truncated output | ||
is used. By default, the setting in | ||
``pandas.options.display.max_info_columns`` is used.""" | ||
) | ||
|
||
|
||
frame_null_counts_sub = dedent( | ||
"""\ | ||
null_counts : bool, optional | ||
Whether to show the non-null counts. By default, this is shown | ||
only if the DataFrame is smaller than | ||
``pandas.options.display.max_info_rows`` and | ||
``pandas.options.display.max_info_columns``. A value of True always | ||
shows the counts, and False never shows the counts.""" | ||
) | ||
|
||
|
||
frame_examples_sub = dedent( | ||
"""\ | ||
>>> int_values = [1, 2, 3, 4, 5] | ||
>>> text_values = ['alpha', 'beta', 'gamma', 'delta', 'epsilon'] | ||
>>> float_values = [0.0, 0.25, 0.5, 0.75, 1.0] | ||
>>> df = pd.DataFrame({"int_col": int_values, "text_col": text_values, | ||
... "float_col": float_values}) | ||
>>> df | ||
int_col text_col float_col | ||
0 1 alpha 0.00 | ||
1 2 beta 0.25 | ||
2 3 gamma 0.50 | ||
3 4 delta 0.75 | ||
4 5 epsilon 1.00 | ||
|
||
Prints information of all columns: | ||
|
||
>>> df.info(verbose=True) | ||
<class 'pandas.core.frame.DataFrame'> | ||
RangeIndex: 5 entries, 0 to 4 | ||
Data columns (total 3 columns): | ||
# Column Non-Null Count Dtype | ||
--- ------ -------------- ----- | ||
0 int_col 5 non-null int64 | ||
1 text_col 5 non-null object | ||
2 float_col 5 non-null float64 | ||
dtypes: float64(1), int64(1), object(1) | ||
memory usage: 248.0+ bytes | ||
|
||
Prints a summary of columns count and its dtypes but not per column | ||
information: | ||
|
||
>>> df.info(verbose=False) | ||
<class 'pandas.core.frame.DataFrame'> | ||
RangeIndex: 5 entries, 0 to 4 | ||
Columns: 3 entries, int_col to float_col | ||
dtypes: float64(1), int64(1), object(1) | ||
memory usage: 248.0+ bytes | ||
|
||
Pipe output of DataFrame.info to buffer instead of sys.stdout, get | ||
buffer content and writes to a text file: | ||
|
||
>>> import io | ||
>>> buffer = io.StringIO() | ||
>>> df.info(buf=buffer) | ||
>>> s = buffer.getvalue() | ||
>>> with open("df_info.txt", "w", | ||
... encoding="utf-8") as f: # doctest: +SKIP | ||
... f.write(s) | ||
260 | ||
|
||
The `memory_usage` parameter allows deep introspection mode, specially | ||
useful for big DataFrames and fine-tune memory optimization: | ||
|
||
>>> random_strings_array = np.random.choice(['a', 'b', 'c'], 10 ** 6) | ||
>>> df = pd.DataFrame({ | ||
... 'column_1': np.random.choice(['a', 'b', 'c'], 10 ** 6), | ||
... 'column_2': np.random.choice(['a', 'b', 'c'], 10 ** 6), | ||
... 'column_3': np.random.choice(['a', 'b', 'c'], 10 ** 6) | ||
... }) | ||
>>> df.info() | ||
<class 'pandas.core.frame.DataFrame'> | ||
RangeIndex: 1000000 entries, 0 to 999999 | ||
Data columns (total 3 columns): | ||
# Column Non-Null Count Dtype | ||
--- ------ -------------- ----- | ||
0 column_1 1000000 non-null object | ||
1 column_2 1000000 non-null object | ||
2 column_3 1000000 non-null object | ||
dtypes: object(3) | ||
memory usage: 22.9+ MB | ||
|
||
>>> df.info(memory_usage='deep') | ||
<class 'pandas.core.frame.DataFrame'> | ||
RangeIndex: 1000000 entries, 0 to 999999 | ||
Data columns (total 3 columns): | ||
# Column Non-Null Count Dtype | ||
--- ------ -------------- ----- | ||
0 column_1 1000000 non-null object | ||
1 column_2 1000000 non-null object | ||
2 column_3 1000000 non-null object | ||
dtypes: object(3) | ||
memory usage: 165.9 MB""" | ||
) | ||
|
||
|
||
frame_see_also_sub = dedent( | ||
"""\ | ||
DataFrame.describe: Generate descriptive statistics of DataFrame | ||
columns. | ||
DataFrame.memory_usage: Memory usage of DataFrame columns.""" | ||
) | ||
|
||
|
||
frame_subs = { | ||
"klass": "DataFrame", | ||
"type_sub": " and columns", | ||
"max_cols_sub": frame_max_cols_sub, | ||
"null_counts_sub": frame_null_counts_sub, | ||
"examples_sub": frame_examples_sub, | ||
"see_also_sub": frame_see_also_sub, | ||
"version_added_sub": "", | ||
} | ||
|
||
|
||
def _put_str(s: Union[str, Dtype], space: int) -> str: | ||
""" | ||
Make string of specified length, padding to the right if necessary. | ||
|
@@ -174,26 +302,26 @@ def render( | |
show_counts: Optional[bool], | ||
) -> None: | ||
""" | ||
Print a concise summary of a %(klass)s. | ||
Print a concise summary of a {klass}. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If you have variables in this docstring, this There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thank you for the comment! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The |
||
|
||
This method prints information about a %(klass)s including | ||
the index dtype%(type_sub)s, non-null values and memory usage. | ||
%(version_added_sub)s\ | ||
This method prints information about a {klass} including | ||
the index dtype{type_sub}, non-null values and memory usage. | ||
{version_added_sub}\ | ||
|
||
Parameters | ||
---------- | ||
data : %(klass)s | ||
%(klass)s to print information about. | ||
data : {klass} | ||
{klass} to print information about. | ||
verbose : bool, optional | ||
Whether to print the full summary. By default, the setting in | ||
``pandas.options.display.max_info_columns`` is followed. | ||
buf : writable buffer, defaults to sys.stdout | ||
Where to send the output. By default, the output is printed to | ||
sys.stdout. Pass a writable buffer if you need to further process | ||
the output. | ||
%(max_cols_sub)s | ||
{max_cols_sub} | ||
memory_usage : bool, str, optional | ||
Specifies whether total memory usage of the %(klass)s | ||
Specifies whether total memory usage of the {klass} | ||
elements (including the index) should be displayed. By default, | ||
this follows the ``pandas.options.display.memory_usage`` setting. | ||
|
||
|
@@ -210,15 +338,15 @@ def render( | |
Returns | ||
------- | ||
None | ||
This method prints a summary of a %(klass)s and returns None. | ||
This method prints a summary of a {klass} and returns None. | ||
|
||
See Also | ||
-------- | ||
%(see_also_sub)s | ||
{see_also_sub} | ||
|
||
Examples | ||
-------- | ||
%(examples_sub)s | ||
{examples_sub} | ||
""" | ||
|
||
|
||
|
@@ -281,6 +409,10 @@ def memory_usage_bytes(self) -> int: | |
deep = False | ||
return self.data.memory_usage(index=True, deep=deep).sum() | ||
|
||
@doc( | ||
BaseInfo.render, | ||
**frame_subs, | ||
) | ||
def render( | ||
self, | ||
*, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why do you do this? This could be simply specified in the decorator, doesn't seem to add value.
So, instead of:
@doc(BaseInfo.render, **frame_subs)
You can use:
@doc(BaseInfo.render, klass='DataFrame', type_sub='and colmns', ...)