From 3f079083796e7323dd7951fec94ab57274d9da3a Mon Sep 17 00:00:00 2001
From: Sven <sven.schellenberg@paradynsystems.com>
Date: Tue, 5 Dec 2017 13:47:42 +1100
Subject: [PATCH 1/8] Fixed #18413, but test case not passing * Handle all-NaN
 columns differently when building metadata for categorical axes on saving
 hdf5 file * Categorical axes fail test case comparison due to type difference
 (even though there isn't a visibly type difference)

---
 pandas/io/pytables.py            | 15 +++++++++++----
 pandas/tests/io/test_pytables.py | 19 +++++++++++++++++++
 2 files changed, 30 insertions(+), 4 deletions(-)

diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index 2a66aea88f6d9..2e7dd7bad4ee3 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -2137,10 +2137,17 @@ def convert(self, values, nan_rep, encoding):
                 # if we have stored a NaN in the categories
                 # then strip it; in theory we could have BOTH
                 # -1s in the codes and nulls :<
-                mask = isna(categories)
-                if mask.any():
-                    categories = categories[~mask]
-                    codes[codes != -1] -= mask.astype(int).cumsum().values
+                if categories is None:
+                    # Handle case of NaN-only categorical columns in which case
+                    # the categories are an empty array; when this is stored,
+                    # pytables cannot write a zero-len array, so on readback
+                    # the categories would be None and `read_hdf()` would fail.
+                    categories = []
+                else:
+                    mask = isna(categories)
+                    if mask.any():
+                        categories = categories[~mask]
+                        codes[codes != -1] -= mask.astype(int).cumsum().values                
 
                 self.data = Categorical.from_codes(codes,
                                                    categories=categories,
diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py
index 5e5fc6e7eac62..868ea243b1d08 100644
--- a/pandas/tests/io/test_pytables.py
+++ b/pandas/tests/io/test_pytables.py
@@ -4927,6 +4927,25 @@ def test_categorical_conversion(self):
             result = read_hdf(path, 'df', where='obsids=B')
             tm.assert_frame_equal(result, expected)
 
+    def test_categorical_nan_only_columns(self):
+        # GH18413
+        # Check that read_hdf with categorical columns with NaN-only values can
+        # be read back.
+        df = pd.DataFrame({
+            'a': ['a', 'b', 'c', np.nan],
+            'b': [np.nan, np.nan, np.nan, np.nan],
+            'c': [1, 2, 3, 4]
+        })
+        df['a'] = df.a.astype('category')
+        df['b'] = df.b.astype('category')
+        expected = df.copy()
+        with ensure_clean_path(self.path) as path:
+            df.to_hdf(path, 'df', format='table', data_columns=True)
+            result = read_hdf(path, 'df')
+            print 'result', result.dtypes
+            print 'expected', expected.dtypes
+            tm.assert_frame_equal(result, expected)
+
     def test_duplicate_column_name(self):
         df = DataFrame(columns=["a", "a"], data=[[0, 0]])
 

From 7a4f93a093ee7573eed5ced4f64e747d65e15bae Mon Sep 17 00:00:00 2001
From: Sven <mail4sven@gmx.net>
Date: Wed, 6 Dec 2017 11:40:20 +1100
Subject: [PATCH 2/8] Fixed test case (#18413) * Change empty category to
 `Index([], dtype=np.float64)` instead of `[]`. * Remove printouts in test
 case.

---
 pandas/io/pytables.py            | 2 +-
 pandas/tests/io/test_pytables.py | 2 --
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index 2e7dd7bad4ee3..568540389958b 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -2142,7 +2142,7 @@ def convert(self, values, nan_rep, encoding):
                     # the categories are an empty array; when this is stored,
                     # pytables cannot write a zero-len array, so on readback
                     # the categories would be None and `read_hdf()` would fail.
-                    categories = []
+                    categories = Index([], dtype=np.float64)
                 else:
                     mask = isna(categories)
                     if mask.any():
diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py
index 868ea243b1d08..d70966af176d1 100644
--- a/pandas/tests/io/test_pytables.py
+++ b/pandas/tests/io/test_pytables.py
@@ -4942,8 +4942,6 @@ def test_categorical_nan_only_columns(self):
         with ensure_clean_path(self.path) as path:
             df.to_hdf(path, 'df', format='table', data_columns=True)
             result = read_hdf(path, 'df')
-            print 'result', result.dtypes
-            print 'expected', expected.dtypes
             tm.assert_frame_equal(result, expected)
 
     def test_duplicate_column_name(self):

From 8540a4afd71edaf5a3ff1bf7653037d6a60c8c12 Mon Sep 17 00:00:00 2001
From: Sven <mail4sven@gmx.net>
Date: Wed, 6 Dec 2017 11:48:06 +1100
Subject: [PATCH 3/8] Removed trailing whitespace

---
 pandas/io/pytables.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index 568540389958b..97b57fb9d5e43 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -2147,7 +2147,7 @@ def convert(self, values, nan_rep, encoding):
                     mask = isna(categories)
                     if mask.any():
                         categories = categories[~mask]
-                        codes[codes != -1] -= mask.astype(int).cumsum().values                
+                        codes[codes != -1] -= mask.astype(int).cumsum().values
 
                 self.data = Categorical.from_codes(codes,
                                                    categories=categories,

From 928c258edaa06c2b94e7e8132aab5fe757a73baf Mon Sep 17 00:00:00 2001
From: Sven <mail4sven@gmx.net>
Date: Wed, 6 Dec 2017 12:39:05 +1100
Subject: [PATCH 4/8] Removed unnecessary dataframe copy

---
 pandas/tests/io/test_pytables.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py
index d70966af176d1..b00464240db36 100644
--- a/pandas/tests/io/test_pytables.py
+++ b/pandas/tests/io/test_pytables.py
@@ -4938,7 +4938,7 @@ def test_categorical_nan_only_columns(self):
         })
         df['a'] = df.a.astype('category')
         df['b'] = df.b.astype('category')
-        expected = df.copy()
+        expected = df
         with ensure_clean_path(self.path) as path:
             df.to_hdf(path, 'df', format='table', data_columns=True)
             result = read_hdf(path, 'df')

From b3766f7e49979b4df2204cdf257a8295bd5b854f Mon Sep 17 00:00:00 2001
From: Sven <mail4sven@gmx.net>
Date: Wed, 6 Dec 2017 12:44:23 +1100
Subject: [PATCH 5/8] Update whatsnew section

---
 doc/source/whatsnew/v0.22.0.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt
index 52ca05d9a76a9..af50ebf79649f 100644
--- a/doc/source/whatsnew/v0.22.0.txt
+++ b/doc/source/whatsnew/v0.22.0.txt
@@ -157,7 +157,7 @@ I/O
 - Bug in :func:`read_msgpack` with a non existent file is passed in Python 2 (:issue:`15296`)
 - Bug in :func:`read_csv` where a ``MultiIndex`` with duplicate columns was not being mangled appropriately (:issue:`18062`)
 - Bug in :func:`read_sas` where a file with 0 variables gave an ``AttributeError`` incorrectly. Now it gives an ``EmptyDataError`` (:issue:`18184`)
--
+- Bug when storing NaN-only categorical columns in hdf5 store (:issue:`18413`)
 -
 
 Plotting

From 52dc141109005b7458fdff8c1699ae00ea0924fa Mon Sep 17 00:00:00 2001
From: Sven <mail4sven@gmx.net>
Date: Wed, 6 Dec 2017 13:07:29 +1100
Subject: [PATCH 6/8] Moved whatsnew entry to 0.21.1

---
 doc/source/whatsnew/v0.21.1.txt | 1 +
 doc/source/whatsnew/v0.22.0.txt | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.21.1.txt b/doc/source/whatsnew/v0.21.1.txt
index e307e605687bf..881babcf2c243 100644
--- a/doc/source/whatsnew/v0.21.1.txt
+++ b/doc/source/whatsnew/v0.21.1.txt
@@ -89,6 +89,7 @@ I/O
 - Bug in parsing integer datetime-like columns with specified format in ``read_sql`` (:issue:`17855`).
 - Bug in :meth:`DataFrame.to_msgpack` when serializing data of the numpy.bool_ datatype (:issue:`18390`)
 - Bug in :func:`read_json` not decoding when reading line deliminted JSON from S3 (:issue:`17200`)
+- Bug when storing NaN-only categorical columns in hdf5 store (:issue:`18413`)
 
 Plotting
 ^^^^^^^^
diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt
index af50ebf79649f..52ca05d9a76a9 100644
--- a/doc/source/whatsnew/v0.22.0.txt
+++ b/doc/source/whatsnew/v0.22.0.txt
@@ -157,7 +157,7 @@ I/O
 - Bug in :func:`read_msgpack` with a non existent file is passed in Python 2 (:issue:`15296`)
 - Bug in :func:`read_csv` where a ``MultiIndex`` with duplicate columns was not being mangled appropriately (:issue:`18062`)
 - Bug in :func:`read_sas` where a file with 0 variables gave an ``AttributeError`` incorrectly. Now it gives an ``EmptyDataError`` (:issue:`18184`)
-- Bug when storing NaN-only categorical columns in hdf5 store (:issue:`18413`)
+-
 -
 
 Plotting

From e6aad40a35c1adfe87af5f96a57a92d23b4b2624 Mon Sep 17 00:00:00 2001
From: Sven <mail4sven@gmx.net>
Date: Fri, 8 Dec 2017 21:08:11 +1100
Subject: [PATCH 7/8] Addressed requested changes * Added additional all-None
 Series * Provided more detail in whatsnew description

---
 .gitignore                       | 1 +
 doc/source/whatsnew/v0.21.1.txt  | 1 +
 pandas/tests/io/test_pytables.py | 4 +++-
 3 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index ff0a6aef47163..00004f262e84c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -106,3 +106,4 @@ doc/build/html/index.html
 doc/tmp.sv
 doc/source/styled.xlsx
 doc/source/templates/
+env/
\ No newline at end of file
diff --git a/doc/source/whatsnew/v0.21.1.txt b/doc/source/whatsnew/v0.21.1.txt
index a80de0a560fac..3b283df9302e8 100644
--- a/doc/source/whatsnew/v0.21.1.txt
+++ b/doc/source/whatsnew/v0.21.1.txt
@@ -92,6 +92,7 @@ I/O
 - Bug in :func:`read_json` not decoding when reading line deliminted JSON from S3 (:issue:`17200`)
 - Bug in :func:`pandas.io.json.json_normalize` to avoid modification of ``meta`` (:issue:`18610`)
 - Bug when storing NaN-only categorical columns in hdf5 store (:issue:`18413`)
+- Bug when reading NaN-only categorical columns in :class:`HDFStore` (:issue:`18413`)
 
 Plotting
 ^^^^^^^^
diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py
index a3442421761d0..d69f3cb293ad8 100644
--- a/pandas/tests/io/test_pytables.py
+++ b/pandas/tests/io/test_pytables.py
@@ -4935,10 +4935,12 @@ def test_categorical_nan_only_columns(self):
         df = pd.DataFrame({
             'a': ['a', 'b', 'c', np.nan],
             'b': [np.nan, np.nan, np.nan, np.nan],
-            'c': [1, 2, 3, 4]
+            'c': [1, 2, 3, 4],
+            'd': pd.Series([None]* 4, dtype=object)
         })
         df['a'] = df.a.astype('category')
         df['b'] = df.b.astype('category')
+        df['d'] = df.b.astype('category')
         expected = df
         with ensure_clean_path(self.path) as path:
             df.to_hdf(path, 'df', format='table', data_columns=True)

From b2ac7c4d534975eeac49be0ddbfa7e8a3eb89602 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Sun, 10 Dec 2017 10:54:25 -0500
Subject: [PATCH 8/8] lint fixes

---
 .gitignore                       | 2 +-
 pandas/tests/io/test_pytables.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.gitignore b/.gitignore
index 00004f262e84c..b1748ae72b8ba 100644
--- a/.gitignore
+++ b/.gitignore
@@ -106,4 +106,4 @@ doc/build/html/index.html
 doc/tmp.sv
 doc/source/styled.xlsx
 doc/source/templates/
-env/
\ No newline at end of file
+env/
diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py
index 54dc41e390b31..85f24e794f12a 100644
--- a/pandas/tests/io/test_pytables.py
+++ b/pandas/tests/io/test_pytables.py
@@ -4936,7 +4936,7 @@ def test_categorical_nan_only_columns(self):
             'a': ['a', 'b', 'c', np.nan],
             'b': [np.nan, np.nan, np.nan, np.nan],
             'c': [1, 2, 3, 4],
-            'd': pd.Series([None]* 4, dtype=object)
+            'd': pd.Series([None] * 4, dtype=object)
         })
         df['a'] = df.a.astype('category')
         df['b'] = df.b.astype('category')