Skip to content

[ArrayManager] TST: get tests running for /tests/frame #39700

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Mar 5, 2021
3 changes: 2 additions & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -154,9 +154,10 @@ jobs:
PANDAS_DATA_MANAGER: array
run: |
source activate pandas-dev

pytest pandas/tests/frame/methods
pytest pandas/tests/frame/test_constructors.py
pytest pandas/tests/frame/constructors/
pytest pandas/tests/frame/test_*
pytest pandas/tests/frame/test_reductions.py
pytest pandas/tests/reductions/
pytest pandas/tests/generic/test_generic.py
Expand Down
1 change: 1 addition & 0 deletions pandas/tests/frame/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,7 @@ def test_attrs(self):
result = df.rename(columns=str)
assert result.attrs == {"version": 1}

@td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) setitem (no copy)
@pytest.mark.parametrize("allows_duplicate_labels", [True, False, None])
def test_set_flags(self, allows_duplicate_labels, frame_or_series):
obj = DataFrame({"A": [1, 2]})
Expand Down
6 changes: 5 additions & 1 deletion pandas/tests/frame/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
import pytest
import pytz

import pandas.util._test_decorators as td

import pandas as pd
from pandas import (
DataFrame,
Expand Down Expand Up @@ -686,6 +688,7 @@ def test_df_add_2d_array_collike_broadcasts(self):
result = collike + df
tm.assert_frame_equal(result, expected)

@td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) decide on dtypes
def test_df_arith_2d_array_rowlike_broadcasts(self, all_arithmetic_operators):
# GH#23000
opname = all_arithmetic_operators
Expand All @@ -707,6 +710,7 @@ def test_df_arith_2d_array_rowlike_broadcasts(self, all_arithmetic_operators):
result = getattr(df, opname)(rowlike)
tm.assert_frame_equal(result, expected)

@td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) decide on dtypes
def test_df_arith_2d_array_collike_broadcasts(self, all_arithmetic_operators):
# GH#23000
opname = all_arithmetic_operators
Expand Down Expand Up @@ -1351,7 +1355,7 @@ def test_strings_to_numbers_comparisons_raises(self, compare_operators_no_eq_ne)

def test_comparison_protected_from_errstate(self):
missing_df = tm.makeDataFrame()
missing_df.iloc[0]["A"] = np.nan
missing_df.loc[missing_df.index[0], "A"] = np.nan
with np.errstate(invalid="ignore"):
expected = missing_df.values < 0
with np.errstate(invalid="raise"):
Expand Down
6 changes: 6 additions & 0 deletions pandas/tests/frame/test_block_internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import pytest

from pandas.errors import PerformanceWarning
import pandas.util._test_decorators as td

import pandas as pd
from pandas import (
Expand All @@ -30,6 +31,11 @@
# structure


# TODO(ArrayManager) check which of those tests need to be rewritten to test the
# equivalent for ArrayManager
pytestmark = td.skip_array_manager_invalid_test


class TestDataFrameBlockInternals:
def test_setitem_invalidates_datetime_index_freq(self):
# GH#24096 altering a datetime64tz column inplace invalidates the
Expand Down
7 changes: 4 additions & 3 deletions pandas/tests/frame/test_nonunique_indexes.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,7 @@ def test_multi_dtype2(self):
expected = DataFrame([[1, 2, "foo", "bar"]], columns=["a", "a.1", "a.2", "a.3"])
tm.assert_frame_equal(df, expected)

def test_dups_across_blocks(self):
def test_dups_across_blocks(self, using_array_manager):
# dups across blocks
df_float = DataFrame(np.random.randn(10, 3), dtype="float64")
df_int = DataFrame(np.random.randn(10, 3), dtype="int64")
Expand All @@ -302,8 +302,9 @@ def test_dups_across_blocks(self):
)
df = pd.concat([df_float, df_int, df_bool, df_object, df_dt], axis=1)

assert len(df._mgr.blknos) == len(df.columns)
assert len(df._mgr.blklocs) == len(df.columns)
if not using_array_manager:
assert len(df._mgr.blknos) == len(df.columns)
assert len(df._mgr.blklocs) == len(df.columns)

# testing iloc
for i in range(len(df.columns)):
Expand Down
6 changes: 4 additions & 2 deletions pandas/tests/frame/test_repr_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,16 @@


class TestDataFrameReprInfoEtc:
def test_repr_bytes_61_lines(self):
def test_repr_bytes_61_lines(self, using_array_manager):
# GH#12857
lets = list("ACDEFGHIJKLMNOP")
slen = 50
nseqs = 1000
words = [[np.random.choice(lets) for x in range(slen)] for _ in range(nseqs)]
df = DataFrame(words).astype("U1")
assert (df.dtypes == object).all()
# TODO(Arraymanager) astype("U1") actually gives this dtype instead of object
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

im pretty sure we dont want this behavior? so should xfail for now?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test is not actually testing astype, but rather repr, so I would prefer to run the rest of the test (which actually passes) without having this line error.

Now, I assume we should have an astype-specific test about this as well, that could be xfailed.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't directly see a test about it in frame/methods/test_astype.py, so I will add a test for that there. It's not very clear, though, what the expected behaviour should be.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could move the repr calls up and do the dtype assertion at the end

if not using_array_manager:
assert (df.dtypes == object).all()

# smoke tests; at one point this raised with 61 but not 60
repr(df)
Expand Down
28 changes: 23 additions & 5 deletions pandas/tests/frame/test_stack_unstack.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import numpy as np
import pytest

import pandas.util._test_decorators as td

import pandas as pd
from pandas import (
DataFrame,
Expand Down Expand Up @@ -60,12 +62,13 @@ def test_stack_mixed_level(self):
expected = expected[["a", "b"]]
tm.assert_frame_equal(result, expected)

def test_unstack_not_consolidated(self):
def test_unstack_not_consolidated(self, using_array_manager):
# Gh#34708
df = DataFrame({"x": [1, 2, np.NaN], "y": [3.0, 4, np.NaN]})
df2 = df[["x"]]
df2["y"] = df["y"]
assert len(df2._mgr.blocks) == 2
if not using_array_manager:
assert len(df2._mgr.blocks) == 2

res = df2.unstack()
expected = df.unstack()
Expand Down Expand Up @@ -747,7 +750,8 @@ def test_unstack_multi_level_rows_and_cols(self):
expected = df.unstack(["i3"]).unstack(["i2"])
tm.assert_frame_equal(result, expected)

def test_unstack_nan_index(self): # GH7466
def test_unstack_nan_index1(self):
# GH7466
def cast(val):
val_str = "" if val != val else val
return f"{val_str:1}"
Expand Down Expand Up @@ -833,6 +837,7 @@ def verify(df):
for col in ["4th", "5th"]:
verify(udf[col])

def test_unstack_nan_index2(self):
# GH7403
df = DataFrame({"A": list("aaaabbbb"), "B": range(8), "C": range(8)})
df.iloc[3, 1] = np.NaN
Expand Down Expand Up @@ -875,6 +880,7 @@ def verify(df):
right = DataFrame(vals, columns=cols, index=idx)
tm.assert_frame_equal(left, right)

def test_unstack_nan_index3(self, using_array_manager):
# GH7401
df = DataFrame(
{
Expand All @@ -896,8 +902,13 @@ def verify(df):
)

right = DataFrame(vals, columns=cols, index=idx)
if using_array_manager:
# INFO(ArrayManager) with ArrayManager preserve dtype where possible
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is INFO(ArrayManager) a pattern i should know?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, it was requested by Jeff on one of the previous PRs. It's an explicit comment about behaviour that changed with ArrayManager, but which is not a TODO (since it's not wrong behaviour that still needs to be fixed)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thanks

cols = right.columns[[1, 2, 3, 5]]
right[cols] = right[cols].astype("int64")
tm.assert_frame_equal(left, right)

def test_unstack_nan_index4(self):
# GH4862
vals = [
["Hg", np.nan, np.nan, 680585148],
Expand Down Expand Up @@ -938,6 +949,8 @@ def verify(df):
left = df.loc[17264:].copy().set_index(["s_id", "dosage", "agent"])
tm.assert_frame_equal(left.unstack(), right)

@td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) MultiIndex bug
def test_unstack_nan_index5(self):
# GH9497 - multiple unstack with nulls
df = DataFrame(
{
Expand Down Expand Up @@ -1887,7 +1900,7 @@ def test_unstack_group_index_overflow(self):
result = s.unstack(4)
assert result.shape == (500, 2)

def test_unstack_with_missing_int_cast_to_float(self):
def test_unstack_with_missing_int_cast_to_float(self, using_array_manager):
# https://github.com/pandas-dev/pandas/issues/37115
df = DataFrame(
{
Expand All @@ -1899,7 +1912,8 @@ def test_unstack_with_missing_int_cast_to_float(self):

# add another int column to get 2 blocks
df["is_"] = 1
assert len(df._mgr.blocks) == 2
if not using_array_manager:
assert len(df._mgr.blocks) == 2

result = df.unstack("b")
result[("is_", "ca")] = result[("is_", "ca")].fillna(0)
Expand All @@ -1912,6 +1926,10 @@ def test_unstack_with_missing_int_cast_to_float(self):
names=[None, "b"],
),
)
if using_array_manager:
# INFO(ArrayManager) with ArrayManager preserve dtype where possible
expected[("v", "cb")] = expected[("v", "cb")].astype("int64")
expected[("is_", "cb")] = expected[("is_", "cb")].astype("int64")
tm.assert_frame_equal(result, expected)

def test_unstack_with_level_has_nan(self):
Expand Down