Skip to content

Commit b0d8ff8

Browse files
[ArrayManager] TST: get tests running for /tests/frame
1 parent e19403d commit b0d8ff8

10 files changed

+126
-15
lines changed

.github/workflows/ci.yml

+1
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,7 @@ jobs:
153153
run: |
154154
source activate pandas-dev
155155
pytest pandas/tests/frame/methods --array-manager
156+
pytest pandas/tests/frame/test_* --array-manager -k "not test_reductions"
156157
pytest pandas/tests/arithmetic/ --array-manager
157158
pytest pandas/tests/reshape/merge --array-manager
158159

pandas/conftest.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -407,11 +407,14 @@ def __len__(self):
407407
# Indices
408408
# ----------------------------------------------------------------
409409
@pytest.fixture
410-
def multiindex_year_month_day_dataframe_random_data():
410+
def multiindex_year_month_day_dataframe_random_data(using_array_manager):
411411
"""
412412
DataFrame with 3 level MultiIndex (year, month, day) covering
413413
first 100 business days from 2000-01-01 with random data
414414
"""
415+
if using_array_manager:
416+
# TODO(ArrayManager) groupby
417+
pytest.skip("Not yet implemented for ArrayManager")
415418
tdf = tm.makeTimeDataFrame(100)
416419
ymd = tdf.groupby([lambda x: x.year, lambda x: x.month, lambda x: x.day]).sum()
417420
# use Int64Index, to make sure things work

pandas/core/internals/array_manager.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -741,7 +741,13 @@ def insert(self, loc: int, item: Hashable, value, allow_duplicates: bool = False
741741

742742
value = extract_array(value, extract_numpy=True)
743743
if value.ndim == 2:
744-
value = value[0, :]
744+
if value.shape[0] == 1:
745+
value = value[0, :]
746+
else:
747+
raise ValueError(
748+
f"expected 1D array, got array with shape {value.shape}"
749+
)
750+
745751
# TODO self.arrays can be empty
746752
# assert len(value) == len(self.arrays[0])
747753

pandas/tests/frame/test_arithmetic.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
import pytest
88
import pytz
99

10+
import pandas.util._test_decorators as td
11+
1012
import pandas as pd
1113
from pandas import (
1214
DataFrame,
@@ -686,6 +688,7 @@ def test_df_add_2d_array_collike_broadcasts(self):
686688
result = collike + df
687689
tm.assert_frame_equal(result, expected)
688690

691+
@td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) decide on dtypes
689692
def test_df_arith_2d_array_rowlike_broadcasts(self, all_arithmetic_operators):
690693
# GH#23000
691694
opname = all_arithmetic_operators
@@ -707,6 +710,7 @@ def test_df_arith_2d_array_rowlike_broadcasts(self, all_arithmetic_operators):
707710
result = getattr(df, opname)(rowlike)
708711
tm.assert_frame_equal(result, expected)
709712

713+
@td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) decide on dtypes
710714
def test_df_arith_2d_array_collike_broadcasts(self, all_arithmetic_operators):
711715
# GH#23000
712716
opname = all_arithmetic_operators
@@ -1351,7 +1355,7 @@ def test_strings_to_numbers_comparisons_raises(self, compare_operators_no_eq_ne)
13511355

13521356
def test_comparison_protected_from_errstate(self):
13531357
missing_df = tm.makeDataFrame()
1354-
missing_df.iloc[0]["A"] = np.nan
1358+
missing_df.loc[missing_df.index[0], "A"] = np.nan
13551359
with np.errstate(invalid="ignore"):
13561360
expected = missing_df.values < 0
13571361
with np.errstate(invalid="raise"):

pandas/tests/frame/test_block_internals.py

+6
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import pytest
1010

1111
from pandas.errors import PerformanceWarning
12+
import pandas.util._test_decorators as td
1213

1314
import pandas as pd
1415
from pandas import (
@@ -30,6 +31,11 @@
3031
# structure
3132

3233

34+
# TODO(ArrayManager) check which of those tests need to be rewritten the test the
35+
# equivalent for ArrayManager
36+
pytestmark = td.skip_array_manager_invalid_test
37+
38+
3339
class TestDataFrameBlockInternals:
3440
def test_setitem_invalidates_datetime_index_freq(self):
3541
# GH#24096 altering a datetime64tz column inplace invalidates the

pandas/tests/frame/test_constructors.py

+10-2
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import pytz
1919

2020
from pandas.compat import np_version_under1p19
21+
import pandas.util._test_decorators as td
2122

2223
from pandas.core.dtypes.common import is_integer_dtype
2324
from pandas.core.dtypes.dtypes import (
@@ -159,7 +160,10 @@ def test_constructor_cast_failure(self):
159160
df["foo"] = np.ones((4, 2)).tolist()
160161

161162
# this is not ok
162-
msg = "Wrong number of items passed 2, placement implies 1"
163+
msg = (
164+
"Wrong number of items passed 2, placement implies 1"
165+
"|expected 1D array, got array"
166+
)
163167
with pytest.raises(ValueError, match=msg):
164168
df["test"] = np.ones((4, 2))
165169

@@ -174,12 +178,15 @@ def test_constructor_dtype_copy(self):
174178
new_df["col1"] = 200.0
175179
assert orig_df["col1"][0] == 1.0
176180

177-
def test_constructor_dtype_nocast_view(self):
181+
def test_constructor_dtype_nocast_view_dataframe(self):
178182
df = DataFrame([[1, 2]])
179183
should_be_view = DataFrame(df, dtype=df[0].dtype)
180184
should_be_view[0][0] = 99
181185
assert df.values[0, 0] == 99
182186

187+
@td.skip_array_manager_invalid_test # TODO(ArrayManager) keep view on 2D array?
188+
def test_constructor_dtype_nocast_view_2d_array(self):
189+
df = DataFrame([[1, 2]])
183190
should_be_view = DataFrame(df.values, dtype=df[0].dtype)
184191
should_be_view[0][0] = 97
185192
assert df.values[0, 0] == 97
@@ -1931,6 +1938,7 @@ def test_constructor_frame_copy(self, float_frame):
19311938
assert (cop["A"] == 5).all()
19321939
assert not (float_frame["A"] == 5).all()
19331940

1941+
@td.skip_array_manager_invalid_test # TODO(ArrayManager) keep view on 2D array?
19341942
def test_constructor_ndarray_copy(self, float_frame):
19351943
df = DataFrame(float_frame.values)
19361944

pandas/tests/frame/test_nonunique_indexes.py

+61-3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import numpy as np
22
import pytest
33

4+
import pandas.util._test_decorators as td
5+
46
import pandas as pd
57
from pandas import (
68
DataFrame,
@@ -18,6 +20,9 @@ def check(result, expected=None):
1820

1921

2022
class TestDataFrameNonuniqueIndexes:
23+
24+
# TODO(ArrayManager) iset with multiple elements not yet implemented
25+
@td.skip_array_manager_not_yet_implemented
2126
def test_setattr_columns_vs_construct_with_columns(self):
2227

2328
# assignment
@@ -234,7 +239,59 @@ def test_column_dups_dropna(self):
234239
result = df.dropna(subset=["A", "C"], how="all")
235240
tm.assert_frame_equal(result, expected)
236241

242+
<<<<<<< HEAD
237243
def test_dup_columns_comparisons(self):
244+
=======
245+
def test_getitem_boolean_series_with_duplicate_columns(self):
246+
# boolean indexing
247+
# GH 4879
248+
dups = ["A", "A", "C", "D"]
249+
df = DataFrame(
250+
np.arange(12).reshape(3, 4), columns=["A", "B", "C", "D"], dtype="float64"
251+
)
252+
expected = df[df.C > 6]
253+
expected.columns = dups
254+
df = DataFrame(np.arange(12).reshape(3, 4), columns=dups, dtype="float64")
255+
result = df[df.C > 6]
256+
check(result, expected)
257+
258+
def test_getitem_boolean_frame_with_duplicate_columns(self):
259+
dups = ["A", "A", "C", "D"]
260+
261+
# where
262+
df = DataFrame(
263+
np.arange(12).reshape(3, 4), columns=["A", "B", "C", "D"], dtype="float64"
264+
)
265+
# `df > 6` is a DataFrame with the same shape+alignment as df
266+
expected = df[df > 6]
267+
expected.columns = dups
268+
df = DataFrame(np.arange(12).reshape(3, 4), columns=dups, dtype="float64")
269+
result = df[df > 6]
270+
check(result, expected)
271+
272+
@td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) duplicate indices
273+
# fix error message
274+
def test_getitem_boolean_frame_unaligned_with_duplicate_columns(self):
275+
# `df.A > 6` is a DataFrame with a different shape from df
276+
dups = ["A", "A", "C", "D"]
277+
278+
# boolean with the duplicate raises
279+
df = DataFrame(np.arange(12).reshape(3, 4), columns=dups, dtype="float64")
280+
msg = "cannot reindex from a duplicate axis"
281+
with pytest.raises(ValueError, match=msg):
282+
df[df.A > 6]
283+
284+
def test_column_dups_indexing(self):
285+
286+
# dup aligning operations should work
287+
# GH 5185
288+
df1 = DataFrame([1, 2, 3, 4, 5], index=[1, 2, 1, 2, 3])
289+
df2 = DataFrame([1, 2, 3], index=[1, 2, 3])
290+
expected = DataFrame([0, 2, 0, 2, 2], index=[1, 1, 2, 2, 3])
291+
result = df1.sub(df2)
292+
tm.assert_frame_equal(result, expected)
293+
294+
>>>>>>> ec83091284... [ArrayManager] TST: get tests running for /tests/frame
238295
# equality
239296
df1 = DataFrame([[1, 2], [2, np.nan], [3, 4], [4, 4]], columns=["A", "B"])
240297
df2 = DataFrame([[0, 1], [2, 4], [2, np.nan], [4, 5]], columns=["A", "A"])
@@ -286,7 +343,7 @@ def test_multi_axis_dups(self):
286343
result = z.loc[["a", "c", "a"]]
287344
check(result, expected)
288345

289-
def test_columns_with_dups(self):
346+
def test_columns_with_dups(self, using_array_manager):
290347
# GH 3468 related
291348

292349
# basic
@@ -341,8 +398,9 @@ def test_dups_across_blocks(self):
341398
)
342399
df = pd.concat([df_float, df_int, df_bool, df_object, df_dt], axis=1)
343400

344-
assert len(df._mgr.blknos) == len(df.columns)
345-
assert len(df._mgr.blklocs) == len(df.columns)
401+
if not using_array_manager:
402+
assert len(df._mgr.blknos) == len(df.columns)
403+
assert len(df._mgr.blklocs) == len(df.columns)
346404

347405
# testing iloc
348406
for i in range(len(df.columns)):

pandas/tests/frame/test_repr_info.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -26,14 +26,16 @@
2626

2727

2828
class TestDataFrameReprInfoEtc:
29-
def test_repr_bytes_61_lines(self):
29+
def test_repr_bytes_61_lines(self, using_array_manager):
3030
# GH#12857
3131
lets = list("ACDEFGHIJKLMNOP")
3232
slen = 50
3333
nseqs = 1000
3434
words = [[np.random.choice(lets) for x in range(slen)] for _ in range(nseqs)]
3535
df = DataFrame(words).astype("U1")
36-
assert (df.dtypes == object).all()
36+
# TODO(Arraymanager) astype("U1") actually gives this dtype instead of object
37+
if not using_array_manager:
38+
assert (df.dtypes == object).all()
3739

3840
# smoke tests; at one point this raised with 61 but not 60
3941
repr(df)

pandas/tests/frame/test_stack_unstack.py

+27-5
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
import numpy as np
66
import pytest
77

8+
import pandas.util._test_decorators as td
9+
810
import pandas as pd
911
from pandas import (
1012
DataFrame,
@@ -60,12 +62,13 @@ def test_stack_mixed_level(self):
6062
expected = expected[["a", "b"]]
6163
tm.assert_frame_equal(result, expected)
6264

63-
def test_unstack_not_consolidated(self):
65+
def test_unstack_not_consolidated(self, using_array_manager):
6466
# Gh#34708
6567
df = DataFrame({"x": [1, 2, np.NaN], "y": [3.0, 4, np.NaN]})
6668
df2 = df[["x"]]
6769
df2["y"] = df["y"]
68-
assert len(df2._mgr.blocks) == 2
70+
if not using_array_manager:
71+
assert len(df2._mgr.blocks) == 2
6972

7073
res = df2.unstack()
7174
expected = df.unstack()
@@ -118,6 +121,8 @@ def test_unstack_fill(self):
118121
expected = unstacked["w"]
119122
tm.assert_frame_equal(result, expected)
120123

124+
# TODO(ArrayManager) iset with multiple elements not yet implemented
125+
@td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) iset
121126
def test_unstack_fill_frame(self):
122127

123128
# From a dataframe
@@ -747,7 +752,8 @@ def test_unstack_multi_level_rows_and_cols(self):
747752
expected = df.unstack(["i3"]).unstack(["i2"])
748753
tm.assert_frame_equal(result, expected)
749754

750-
def test_unstack_nan_index(self): # GH7466
755+
def test_unstack_nan_index1(self):
756+
# GH7466
751757
def cast(val):
752758
val_str = "" if val != val else val
753759
return f"{val_str:1}"
@@ -833,6 +839,7 @@ def verify(df):
833839
for col in ["4th", "5th"]:
834840
verify(udf[col])
835841

842+
def test_unstack_nan_index2(self):
836843
# GH7403
837844
df = DataFrame({"A": list("aaaabbbb"), "B": range(8), "C": range(8)})
838845
df.iloc[3, 1] = np.NaN
@@ -875,6 +882,7 @@ def verify(df):
875882
right = DataFrame(vals, columns=cols, index=idx)
876883
tm.assert_frame_equal(left, right)
877884

885+
def test_unstack_nan_index3(self, using_array_manager):
878886
# GH7401
879887
df = DataFrame(
880888
{
@@ -896,8 +904,13 @@ def verify(df):
896904
)
897905

898906
right = DataFrame(vals, columns=cols, index=idx)
907+
if using_array_manager:
908+
# with ArrayManager preserve dtype where possible
909+
cols = right.columns[[1, 2, 3, 5]]
910+
right[cols] = right[cols].astype("int64")
899911
tm.assert_frame_equal(left, right)
900912

913+
def test_unstack_nan_index4(self):
901914
# GH4862
902915
vals = [
903916
["Hg", np.nan, np.nan, 680585148],
@@ -938,6 +951,8 @@ def verify(df):
938951
left = df.loc[17264:].copy().set_index(["s_id", "dosage", "agent"])
939952
tm.assert_frame_equal(left.unstack(), right)
940953

954+
@td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) MultiIndex bug
955+
def test_unstack_nan_index5(self):
941956
# GH9497 - multiple unstack with nulls
942957
df = DataFrame(
943958
{
@@ -1453,6 +1468,7 @@ def test_stack_mixed_dtype(self, multiindex_dataframe_random_data):
14531468
assert result.name is None
14541469
assert stacked["bar"].dtype == np.float_
14551470

1471+
@td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) groupby
14561472
def test_unstack_bug(self):
14571473
df = DataFrame(
14581474
{
@@ -1689,6 +1705,7 @@ def test_unstack_period_frame(self):
16891705

16901706
tm.assert_frame_equal(result3, expected)
16911707

1708+
@td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) groupby
16921709
def test_stack_multiple_bug(self):
16931710
# bug when some uniques are not present in the data GH#3170
16941711
id_col = ([1] * 3) + ([2] * 3)
@@ -1887,7 +1904,7 @@ def test_unstack_group_index_overflow(self):
18871904
result = s.unstack(4)
18881905
assert result.shape == (500, 2)
18891906

1890-
def test_unstack_with_missing_int_cast_to_float(self):
1907+
def test_unstack_with_missing_int_cast_to_float(self, using_array_manager):
18911908
# https://github.com/pandas-dev/pandas/issues/37115
18921909
df = DataFrame(
18931910
{
@@ -1899,7 +1916,8 @@ def test_unstack_with_missing_int_cast_to_float(self):
18991916

19001917
# add another int column to get 2 blocks
19011918
df["is_"] = 1
1902-
assert len(df._mgr.blocks) == 2
1919+
if not using_array_manager:
1920+
assert len(df._mgr.blocks) == 2
19031921

19041922
result = df.unstack("b")
19051923
result[("is_", "ca")] = result[("is_", "ca")].fillna(0)
@@ -1912,6 +1930,10 @@ def test_unstack_with_missing_int_cast_to_float(self):
19121930
names=[None, "b"],
19131931
),
19141932
)
1933+
if using_array_manager:
1934+
# with ArrayManager preserve dtype where possible
1935+
expected[("v", "cb")] = expected[("v", "cb")].astype("int64")
1936+
expected[("is_", "cb")] = expected[("is_", "cb")].astype("int64")
19151937
tm.assert_frame_equal(result, expected)
19161938

19171939
def test_unstack_with_level_has_nan(self):

pandas/tests/frame/test_subclass.py

+1
Original file line numberDiff line numberDiff line change
@@ -702,6 +702,7 @@ def test_idxmax_preserves_subclass(self):
702702
result = df.idxmax()
703703
assert isinstance(result, tm.SubclassedSeries)
704704

705+
@td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) equals
705706
def test_equals_subclass(self):
706707
# https://github.com/pandas-dev/pandas/pull/34402
707708
# allow subclass in both directions

0 commit comments

Comments
 (0)