From 3dff081a7656c7d882d18e5d5648523c0110302a Mon Sep 17 00:00:00 2001
From: "SiYoungOh(ohahohah)" <ohahohah.dev@gmail.com>
Date: Sat, 10 Mar 2018 17:46:50 +0900
Subject: [PATCH 1/6] DOC: Improved the docstring of
 pd.DataFrame.memory_usage/empty

---
 pandas/core/frame.py   | 40 ++++++++++++++++++++++++++++++++++++++--
 pandas/core/generic.py | 12 ++++++++++--
 2 files changed, 48 insertions(+), 4 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index a66d00fff9714..fc95e4f604461 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -1943,7 +1943,11 @@ def _sizeof_fmt(num, size_qualifier):
         _put_lines(buf, lines)
 
     def memory_usage(self, index=True, deep=False):
-        """Memory usage of DataFrame columns.
+        """
+        Memory usage of DataFrame columns.
+
+        Memory usage of DataFrame is accessing pandas.DataFrame.info method.
+        A configuration option, `display.memory_usage` (see Parameters)
 
         Parameters
         ----------
@@ -1953,7 +1957,7 @@ def memory_usage(self, index=True, deep=False):
             the first index of the Series is `Index`.
         deep : bool
             Introspect the data deeply, interrogate
-            `object` dtypes for system-level memory consumption
+            `object` dtypes for system-level memory consumption.
 
         Returns
         -------
@@ -1969,6 +1973,38 @@ def memory_usage(self, index=True, deep=False):
         See Also
         --------
         numpy.ndarray.nbytes
+
+        Examples
+        --------
+        >>> dtypes = ['int64', 'float64', 'complex128', 'object', 'bool']
+        >>> data = dict([(t, np.random.randint(100, size=5000).astype(t))
+        ...              for t in dtypes])
+        >>> df = pd.DataFrame(data)
+        >>> df.memory_usage()
+        Index            80
+        int64         40000
+        float64       40000
+        complex128    80000
+        object        40000
+        bool           5000
+        dtype: int64
+        >>> df.memory_usage(index=False)
+        int64         40000
+        float64       40000
+        complex128    80000
+        object        40000
+        bool           5000
+        dtype: int64
+        >>> df.memory_usage(index=True)
+        Index            80
+        int64         40000
+        float64       40000
+        complex128    80000
+        object        40000
+        bool           5000
+        dtype: int64
+        >>> df.memory_usage(index=True).sum()
+        205080
         """
         result = Series([c.memory_usage(index=False, deep=deep)
                          for col, c in self.iteritems()], index=self.columns)
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index a893b2ba1a189..34835dc69bec0 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -1436,12 +1436,20 @@ def __contains__(self, key):
 
     @property
     def empty(self):
-        """True if NDFrame is entirely empty [no items], meaning any of the
+        """
+        True if DataFrame is empty.
+
+        True if DataFrame is entirely empty [no items], meaning any of the
         axes are of length 0.
 
+        Returns
+        -------
+        empty : boolean
+            if DataFrame is empty, return true, if not return false.
+
         Notes
         -----
-        If NDFrame contains only NaNs, it is still not considered empty. See
+        If DataFrame contains only NaNs, it is still not considered empty. See
         the example below.
 
         Examples

From fc5b498ba8d2ad211f64e993e74c1901b09a3d53 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 15 Mar 2018 14:56:16 -0500
Subject: [PATCH 2/6] Updates [ci skip]

* Consistent with Series.memory_usage
* Added Categorical notes

[ci skip]
---
 pandas/core/frame.py | 64 +++++++++++++++++++++++++++++++-------------
 1 file changed, 45 insertions(+), 19 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index fc95e4f604461..82e6847150344 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -1944,42 +1944,52 @@ def _sizeof_fmt(num, size_qualifier):
 
     def memory_usage(self, index=True, deep=False):
         """
-        Memory usage of DataFrame columns.
+        Return the memory usage of each column in bytes.
+
+        The memory usage can optionally include the contribution of
+        the index and elements of `object` dtype.
 
-        Memory usage of DataFrame is accessing pandas.DataFrame.info method.
         A configuration option, `display.memory_usage` (see Parameters)
 
         Parameters
         ----------
-        index : bool
-            Specifies whether to include memory usage of DataFrame's
-            index in returned Series. If `index=True` (default is False)
-            the first index of the Series is `Index`.
+        index : bool, default False
+            Specifies whether to include the memory usage of the DataFrame's
+            index in returned Series. If ``index=True`` the memory usage of the
+            index the first item in the output.
         deep : bool
-            Introspect the data deeply, interrogate
-            `object` dtypes for system-level memory consumption.
+            If True, introspect the data deeply by interrogating
+            `object` dtypes for system-level memory consumption, and include
+            it in the returned values.
 
         Returns
         -------
         sizes : Series
-            A series with column names as index and memory usage of
-            columns with units of bytes.
-
-        Notes
-        -----
-        Memory usage does not include memory consumed by elements that
-        are not components of the array if deep=False
+            A Series whose index is the original column names and whose values
+            is the memory usage of each column in bytes.
 
         See Also
         --------
-        numpy.ndarray.nbytes
+        numpy.ndarray.nbytes : Total bytes consumed by the elements of an
+            ndarray.
+        Series.memory_usage : Bytes consumed by a Series.
+        pandas.Categorical : Memory-efficient array for string values with
+            many repeated values.
 
         Examples
         --------
         >>> dtypes = ['int64', 'float64', 'complex128', 'object', 'bool']
-        >>> data = dict([(t, np.random.randint(100, size=5000).astype(t))
+        >>> data = dict([(t, np.ones(shape=5000).astype(t))
         ...              for t in dtypes])
         >>> df = pd.DataFrame(data)
+        >>> df.head()
+           int64  float64  complex128 object  bool
+        0      1      1.0      (1+0j)      1  True
+        1      1      1.0      (1+0j)      1  True
+        2      1      1.0      (1+0j)      1  True
+        3      1      1.0      (1+0j)      1  True
+        4      1      1.0      (1+0j)      1  True
+
         >>> df.memory_usage()
         Index            80
         int64         40000
@@ -1988,6 +1998,7 @@ def memory_usage(self, index=True, deep=False):
         object        40000
         bool           5000
         dtype: int64
+
         >>> df.memory_usage(index=False)
         int64         40000
         float64       40000
@@ -1995,6 +2006,7 @@ def memory_usage(self, index=True, deep=False):
         object        40000
         bool           5000
         dtype: int64
+
         >>> df.memory_usage(index=True)
         Index            80
         int64         40000
@@ -2003,8 +2015,22 @@ def memory_usage(self, index=True, deep=False):
         object        40000
         bool           5000
         dtype: int64
-        >>> df.memory_usage(index=True).sum()
-        205080
+
+        The memory footprint of `object` dtype columns is ignored by default:
+        >>> df.memory_usage(deep=True)
+        Index             80
+        int64          40000
+        float64        40000
+        complex128     80000
+        object        160000
+        bool            5000
+        dtype: int64
+
+        Use a Categorical for efficient storage of an object-dtype column with
+        many repeated values.
+
+        >>> df['object'].astype('category').memory_usage(deep=True)
+        5168
         """
         result = Series([c.memory_usage(index=False, deep=deep)
                          for col, c in self.iteritems()], index=self.columns)

From b033dc6950f1cd20578d2bce71c5d24431645edb Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Thu, 15 Mar 2018 21:21:15 +0100
Subject: [PATCH 3/6] fix wrong default

---
 pandas/core/frame.py | 14 +++-----------
 1 file changed, 3 insertions(+), 11 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 82e6847150344..ece0e4819566e 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -1953,11 +1953,11 @@ def memory_usage(self, index=True, deep=False):
 
         Parameters
         ----------
-        index : bool, default False
+        index : bool, default True
             Specifies whether to include the memory usage of the DataFrame's
             index in returned Series. If ``index=True`` the memory usage of the
             index the first item in the output.
-        deep : bool
+        deep : bool, default False
             If True, introspect the data deeply by interrogating
             `object` dtypes for system-level memory consumption, and include
             it in the returned values.
@@ -2007,16 +2007,8 @@ def memory_usage(self, index=True, deep=False):
         bool           5000
         dtype: int64
 
-        >>> df.memory_usage(index=True)
-        Index            80
-        int64         40000
-        float64       40000
-        complex128    80000
-        object        40000
-        bool           5000
-        dtype: int64
-
         The memory footprint of `object` dtype columns is ignored by default:
+
         >>> df.memory_usage(deep=True)
         Index             80
         int64          40000

From bb7f341c71517bd8e351c314af6ff191f8ca1792 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Thu, 15 Mar 2018 21:23:39 +0100
Subject: [PATCH 4/6] Update generic.py

---
 pandas/core/generic.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 34835dc69bec0..c6a31a6e1c749 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -1437,15 +1437,15 @@ def __contains__(self, key):
     @property
     def empty(self):
         """
-        True if DataFrame is empty.
+        Indicator whether DataFrame is empty.
 
-        True if DataFrame is entirely empty [no items], meaning any of the
-        axes are of length 0.
+        True if DataFrame is entirely empty (no items), meaning any of the
+        axes are of length 0. O
 
         Returns
         -------
-        empty : boolean
-            if DataFrame is empty, return true, if not return false.
+        bool
+            If DataFrame is empty, return True, if not return False.
 
         Notes
         -----

From 1585a0e09f6621a9e81be260269c5046a316db59 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Thu, 15 Mar 2018 21:24:30 +0100
Subject: [PATCH 5/6] Update generic.py

---
 pandas/core/generic.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index c6a31a6e1c749..4a2698290166f 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -1440,7 +1440,7 @@ def empty(self):
         Indicator whether DataFrame is empty.
 
         True if DataFrame is entirely empty (no items), meaning any of the
-        axes are of length 0. O
+        axes are of length 0.
 
         Returns
         -------

From d4cc71d94be50e796863ad754fdd6220ffa56401 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 15 Mar 2018 15:49:43 -0500
Subject: [PATCH 6/6] info [ci skip]

[ci skip]
---
 pandas/core/frame.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index ece0e4819566e..af3d5a0f93cce 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -1949,7 +1949,8 @@ def memory_usage(self, index=True, deep=False):
         The memory usage can optionally include the contribution of
         the index and elements of `object` dtype.
 
-        A configuration option, `display.memory_usage` (see Parameters)
+        This value is displayed in `DataFrame.info` by default. This can be
+        suppressed by setting ``pandas.options.display.memory_usage`` to False.
 
         Parameters
         ----------
@@ -1975,6 +1976,7 @@ def memory_usage(self, index=True, deep=False):
         Series.memory_usage : Bytes consumed by a Series.
         pandas.Categorical : Memory-efficient array for string values with
             many repeated values.
+        DataFrame.info : Concise summary of a DataFrame.
 
         Examples
         --------