Skip to content

API: frames from HDFStore stored without an index now give RangeIndex #51076

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jan 31, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -619,6 +619,7 @@ Other API changes
new DataFrame (shallow copy) instead of the original DataFrame, consistent with other
methods to get a full slice (for example ``df.loc[:]`` or ``df[:]``) (:issue:`49469`)
- Disallow computing ``cumprod`` for :class:`Timedelta` object; previously this returned incorrect values (:issue:`50246`)
- :class:`DataFrame` objects read from a :class:`HDFStore` file without an index now have a :class:`RangeIndex` instead of an ``int64`` index (:issue:`51076`)
- Instantiating an :class:`Index` with an numeric numpy dtype with data containing :class:`NA` and/or :class:`NaT` now raises a ``ValueError``. Previously a ``TypeError`` was raised (:issue:`51050`)
- Loading a JSON file with duplicate columns using ``read_json(orient='split')`` renames columns to avoid duplicates, as :func:`read_csv` and the other readers do (:issue:`50370`)
- The levels of the index of the :class:`Series` returned from ``Series.sparse.from_coo`` now always have dtype ``int32``. Previously they had dtype ``int64`` (:issue:`50926`)
Expand Down
3 changes: 2 additions & 1 deletion pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@
Index,
MultiIndex,
PeriodIndex,
RangeIndex,
Series,
TimedeltaIndex,
concat,
Expand Down Expand Up @@ -2258,7 +2259,7 @@ def convert(
"""
assert isinstance(values, np.ndarray), type(values)

index = Index(np.arange(len(values), dtype=np.int64))
index = RangeIndex(len(values))
return index, index

def set_attr(self) -> None:
Expand Down
32 changes: 16 additions & 16 deletions pandas/tests/io/pytables/test_append.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,13 +86,13 @@ def test_append(setup_path):
)
_maybe_remove(store, "uints")
store.append("uints", uint_data)
tm.assert_frame_equal(store["uints"], uint_data)
tm.assert_frame_equal(store["uints"], uint_data, check_index_type=True)

# uints - test storage of uints in indexable columns
_maybe_remove(store, "uints")
# 64-bit indices not yet supported
store.append("uints", uint_data, data_columns=["u08", "u16", "u32"])
tm.assert_frame_equal(store["uints"], uint_data)
tm.assert_frame_equal(store["uints"], uint_data, check_index_type=True)


def test_append_series(setup_path):
Expand Down Expand Up @@ -128,7 +128,7 @@ def test_append_series(setup_path):
# select on the index and values
expected = ns[(ns > 70) & (ns.index < 90)]
result = store.select("ns", "foo>70 and index<90")
tm.assert_series_equal(result, expected)
tm.assert_series_equal(result, expected, check_index_type=True)

# multi-index
mi = DataFrame(np.random.randn(5, 1), columns=["A"])
Expand All @@ -139,7 +139,7 @@ def test_append_series(setup_path):
s = mi.stack()
s.index = s.index.droplevel(2)
store.append("mi", s)
tm.assert_series_equal(store["mi"], s)
tm.assert_series_equal(store["mi"], s, check_index_type=True)


def test_append_some_nans(setup_path):
Expand All @@ -162,31 +162,31 @@ def test_append_some_nans(setup_path):
df.loc[0:15, ["A1", "B", "D", "E"]] = np.nan
store.append("df1", df[:10])
store.append("df1", df[10:])
tm.assert_frame_equal(store["df1"], df)
tm.assert_frame_equal(store["df1"], df, check_index_type=True)

# first column
df1 = df.copy()
df1["A1"] = np.nan
_maybe_remove(store, "df1")
store.append("df1", df1[:10])
store.append("df1", df1[10:])
tm.assert_frame_equal(store["df1"], df1)
tm.assert_frame_equal(store["df1"], df1, check_index_type=True)

# 2nd column
df2 = df.copy()
df2["A2"] = np.nan
_maybe_remove(store, "df2")
store.append("df2", df2[:10])
store.append("df2", df2[10:])
tm.assert_frame_equal(store["df2"], df2)
tm.assert_frame_equal(store["df2"], df2, check_index_type=True)

# datetimes
df3 = df.copy()
df3["E"] = np.nan
_maybe_remove(store, "df3")
store.append("df3", df3[:10])
store.append("df3", df3[10:])
tm.assert_frame_equal(store["df3"], df3)
tm.assert_frame_equal(store["df3"], df3, check_index_type=True)


def test_append_all_nans(setup_path):
Expand All @@ -203,13 +203,13 @@ def test_append_all_nans(setup_path):
_maybe_remove(store, "df")
store.append("df", df[:10], dropna=True)
store.append("df", df[10:], dropna=True)
tm.assert_frame_equal(store["df"], df[-4:])
tm.assert_frame_equal(store["df"], df[-4:], check_index_type=True)

# nan some entire rows (dropna=False)
_maybe_remove(store, "df2")
store.append("df2", df[:10], dropna=False)
store.append("df2", df[10:], dropna=False)
tm.assert_frame_equal(store["df2"], df)
tm.assert_frame_equal(store["df2"], df, check_index_type=True)

# tests the option io.hdf.dropna_table
with pd.option_context("io.hdf.dropna_table", False):
Expand Down Expand Up @@ -240,12 +240,12 @@ def test_append_all_nans(setup_path):
_maybe_remove(store, "df")
store.append("df", df[:10], dropna=True)
store.append("df", df[10:], dropna=True)
tm.assert_frame_equal(store["df"], df)
tm.assert_frame_equal(store["df"], df, check_index_type=True)

_maybe_remove(store, "df2")
store.append("df2", df[:10], dropna=False)
store.append("df2", df[10:], dropna=False)
tm.assert_frame_equal(store["df2"], df)
tm.assert_frame_equal(store["df2"], df, check_index_type=True)

# nan some entire rows (but since we have dates they are still
# written!)
Expand All @@ -266,12 +266,12 @@ def test_append_all_nans(setup_path):
_maybe_remove(store, "df")
store.append("df", df[:10], dropna=True)
store.append("df", df[10:], dropna=True)
tm.assert_frame_equal(store["df"], df)
tm.assert_frame_equal(store["df"], df, check_index_type=True)

_maybe_remove(store, "df2")
store.append("df2", df[:10], dropna=False)
store.append("df2", df[10:], dropna=False)
tm.assert_frame_equal(store["df2"], df)
tm.assert_frame_equal(store["df2"], df, check_index_type=True)


def test_append_frame_column_oriented(setup_path):
Expand Down Expand Up @@ -882,7 +882,7 @@ def test_append_to_multiple_dropna(setup_path):
)
result = store.select_as_multiple(["df1", "df2"])
expected = df.dropna()
tm.assert_frame_equal(result, expected)
tm.assert_frame_equal(result, expected, check_index_type=True)
tm.assert_index_equal(store.select("df1").index, store.select("df2").index)


Expand Down Expand Up @@ -932,4 +932,4 @@ def test_append_to_multiple_min_itemsize(setup_path):
min_itemsize={"Str": 10, "LongStr": 100, "Num": 2},
)
result = store.select_as_multiple(["index", "nums", "strs"])
tm.assert_frame_equal(result, expected)
tm.assert_frame_equal(result, expected, check_index_type=True)
8 changes: 4 additions & 4 deletions pandas/tests/io/pytables/test_compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,25 +51,25 @@ def test_read_complete(self, pytables_hdf5_file):
path, objname, df = pytables_hdf5_file
result = pd.read_hdf(path, key=objname)
expected = df
tm.assert_frame_equal(result, expected)
tm.assert_frame_equal(result, expected, check_index_type=True)

def test_read_with_start(self, pytables_hdf5_file):
path, objname, df = pytables_hdf5_file
# This is a regression test for pandas-dev/pandas/issues/11188
result = pd.read_hdf(path, key=objname, start=1)
expected = df[1:].reset_index(drop=True)
tm.assert_frame_equal(result, expected)
tm.assert_frame_equal(result, expected, check_index_type=True)

def test_read_with_stop(self, pytables_hdf5_file):
path, objname, df = pytables_hdf5_file
# This is a regression test for pandas-dev/pandas/issues/11188
result = pd.read_hdf(path, key=objname, stop=1)
expected = df[:1].reset_index(drop=True)
tm.assert_frame_equal(result, expected)
tm.assert_frame_equal(result, expected, check_index_type=True)

def test_read_with_startstop(self, pytables_hdf5_file):
path, objname, df = pytables_hdf5_file
# This is a regression test for pandas-dev/pandas/issues/11188
result = pd.read_hdf(path, key=objname, start=1, stop=2)
expected = df[1:2].reset_index(drop=True)
tm.assert_frame_equal(result, expected)
tm.assert_frame_equal(result, expected, check_index_type=True)