diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 0d2254e401103..6eb61f14d5629 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -1027,6 +1027,7 @@ I/O - :meth:`HDFStore.keys` has now an optional `include` parameter that allows the retrieval of all native HDF5 table names (:issue:`29916`) - Bug in :meth:`read_excel` for ODS files removes 0.0 values (:issue:`27222`) - Bug in :meth:`ujson.encode` was raising an `OverflowError` with numbers larger than sys.maxsize (:issue: `34395`) +- Bug in :meth:`HDFStore.append_to_multiple` was raising a ``ValueError`` when the min_itemsize parameter is set (:issue:`11238`) Plotting ^^^^^^^^ diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 800e9474cc0f8..0e5d7b007bd89 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -1303,6 +1303,8 @@ def append_to_multiple( valid_index = valid_index.intersection(index) value = value.loc[valid_index] + min_itemsize = kwargs.pop("min_itemsize", None) + # append for k, v in d.items(): dc = data_columns if k == selector else None @@ -1310,7 +1312,12 @@ def append_to_multiple( # compute the val val = value.reindex(v, axis=axis) - self.append(k, val, data_columns=dc, **kwargs) + filtered = ( + {key: value for (key, value) in min_itemsize.items() if key in v} + if min_itemsize is not None + else None + ) + self.append(k, val, data_columns=dc, min_itemsize=filtered, **kwargs) def create_table_index( self, diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index 524e9f41a7731..c69992471fc9b 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -3697,6 +3697,33 @@ def test_append_to_multiple_dropna_false(self, setup_path): assert not store.select("df1a").index.equals(store.select("df2a").index) + def test_append_to_multiple_min_itemsize(self, setup_path): + # GH 11238 + df = pd.DataFrame( + { + "IX": np.arange(1, 21), + "Num": np.arange(1, 21), + "BigNum": np.arange(1, 21) * 88, + "Str": ["a" for _ in range(20)], + "LongStr": ["abcde" for _ in range(20)], + } + ) + expected = df.iloc[[0]] + + with ensure_clean_store(setup_path) as store: + store.append_to_multiple( + { + "index": ["IX"], + "nums": ["Num", "BigNum"], + "strs": ["Str", "LongStr"], + }, + df.iloc[[0]], + "index", + min_itemsize={"Str": 10, "LongStr": 100, "Num": 2}, + ) + result = store.select_as_multiple(["index", "nums", "strs"]) + tm.assert_frame_equal(result, expected) + def test_select_as_multiple(self, setup_path): df1 = tm.makeTimeDataFrame()