Skip to content

Commit 968c7f1

Browse files
committed
DOC/TST: change to use parameterization
1 parent 9e39794 commit 968c7f1

File tree

3 files changed

+145
-216
lines changed

3 files changed

+145
-216
lines changed

doc/source/whatsnew/v0.20.0.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1023,6 +1023,7 @@ Bug Fixes
10231023

10241024
- Bug in ``DataFrame.to_stata()`` and ``StataWriter`` which produces incorrectly formatted files to be produced for some locales (:issue:`13856`)
10251025
- Bug in ``StataReader`` and ``StataWriter`` which allows invalid encodings (:issue:`15723`)
1026+
- Bug with ``sort=True`` in ``DataFrame.join`` and ``pd.merge`` when joining on indexes (:issue:`15582`)
10261027

10271028
- Bug in ``pd.concat()`` in which concatting with an empty dataframe with ``join='inner'`` was being improperly handled (:issue:`15328`)
10281029
- Bug in ``groupby.agg()`` incorrectly localizing timezone on ``datetime`` (:issue:`15426`, :issue:`10668`, :issue:`13046`)
@@ -1038,4 +1039,3 @@ Bug Fixes
10381039
- Bug in ``pd.melt()`` where passing a tuple value for ``value_vars`` caused a ``TypeError`` (:issue:`15348`)
10391040
- Bug in ``.eval()`` which caused multiline evals to fail with local variables not on the first line (:issue:`15342`)
10401041
- Bug in ``pd.read_msgpack`` which did not allow to load dataframe with an index of type ``CategoricalIndex`` (:issue:`15487`)
1041-
- Bug with ``sort=True`` in ``DataFrame.join`` and ``pd.merge`` when joining on indexes (:issue:`15582`)

pandas/tests/frame/test_join.py

Lines changed: 104 additions & 135 deletions
Original file line numberDiff line numberDiff line change
@@ -1,171 +1,140 @@
11
# -*- coding: utf-8 -*-
22

3-
from __future__ import print_function
4-
3+
import pytest
54
import numpy as np
65

7-
import pandas as pd
8-
6+
from pandas import DataFrame, Index
97
from pandas.tests.frame.common import TestData
10-
118
import pandas.util.testing as tm
129

1310

14-
class TestDataFrameJoin(TestData):
15-
16-
def test_join(self):
17-
df1 = pd.DataFrame({'a': [20, 10, 0]}, index=[2, 1, 0])
18-
df2 = pd.DataFrame({'b': [100, 200, 300]}, index=[1, 2, 3])
11+
@pytest.fixture
12+
def frame():
13+
return TestData().frame
14+
15+
16+
@pytest.fixture
17+
def df1():
18+
return DataFrame({'a': [20, 10, 0]}, index=[2, 1, 0])
19+
20+
21+
@pytest.fixture
22+
def df2():
23+
return DataFrame({'b': [100, 200, 300]}, index=[1, 2, 3])
24+
25+
26+
@pytest.mark.parametrize(
27+
"how, sort, expected",
28+
[('inner', False, DataFrame({'a': [20, 10],
29+
'b': [200, 100]},
30+
index=[2, 1])),
31+
('inner', True, DataFrame({'a': [10, 20],
32+
'b': [100, 200]},
33+
index=[1, 2])),
34+
('left', False, DataFrame({'a': [20, 10, 0],
35+
'b': [200, 100, np.nan]},
36+
index=[2, 1, 0])),
37+
('left', True, DataFrame({'a': [0, 10, 20],
38+
'b': [np.nan, 100, 200]},
39+
index=[0, 1, 2])),
40+
('right', False, DataFrame({'a': [10, 20, np.nan],
41+
'b': [100, 200, 300]},
42+
index=[1, 2, 3])),
43+
('right', True, DataFrame({'a': [10, 20, np.nan],
44+
'b': [100, 200, 300]},
45+
index=[1, 2, 3])),
46+
('outer', False, DataFrame({'a': [0, 10, 20, np.nan],
47+
'b': [np.nan, 100, 200, 300]},
48+
index=[0, 1, 2, 3])),
49+
('outer', True, DataFrame({'a': [0, 10, 20, np.nan],
50+
'b': [np.nan, 100, 200, 300]},
51+
index=[0, 1, 2, 3]))])
52+
def test_join(df1, df2, how, sort, expected):
1953

20-
# default how='left'
21-
result = df1.join(df2)
22-
expected = pd.DataFrame({'a': [20, 10, 0], 'b': [200, 100, np.nan]},
23-
index=[2, 1, 0])
24-
tm.assert_frame_equal(result, expected)
54+
result = df1.join(df2, how=how, sort=sort)
55+
tm.assert_frame_equal(result, expected)
2556

26-
# how='left'
27-
result = df1.join(df2, how='left')
28-
expected = pd.DataFrame({'a': [20, 10, 0], 'b': [200, 100, np.nan]},
29-
index=[2, 1, 0])
30-
tm.assert_frame_equal(result, expected)
3157

32-
# how='right'
33-
result = df1.join(df2, how='right')
34-
expected = pd.DataFrame({'a': [10, 20, np.nan], 'b': [100, 200, 300]},
35-
index=[1, 2, 3])
36-
tm.assert_frame_equal(result, expected)
58+
def test_join_index(frame):
59+
# left / right
3760

38-
# how='inner'
39-
result = df1.join(df2, how='inner')
40-
expected = pd.DataFrame({'a': [20, 10], 'b': [200, 100]},
41-
index=[2, 1])
42-
tm.assert_frame_equal(result, expected)
61+
f = frame.loc[frame.index[:10], ['A', 'B']]
62+
f2 = frame.loc[frame.index[5:], ['C', 'D']].iloc[::-1]
4363

44-
# how='outer'
45-
result = df1.join(df2, how='outer')
46-
expected = pd.DataFrame({'a': [0, 10, 20, np.nan],
47-
'b': [np.nan, 100, 200, 300]},
48-
index=[0, 1, 2, 3])
49-
tm.assert_frame_equal(result, expected)
50-
51-
def test_join_sort(self):
52-
df1 = pd.DataFrame({'a': [20, 10, 0]}, index=[2, 1, 0])
53-
df2 = pd.DataFrame({'b': [100, 200, 300]}, index=[1, 2, 3])
54-
55-
# default how='left'
56-
result = df1.join(df2, sort=True)
57-
expected = pd.DataFrame({'a': [0, 10, 20], 'b': [np.nan, 100, 200]},
58-
index=[0, 1, 2])
59-
tm.assert_frame_equal(result, expected)
60-
61-
# how='left'
62-
result = df1.join(df2, how='left', sort=True)
63-
expected = pd.DataFrame({'a': [0, 10, 20], 'b': [np.nan, 100, 200]},
64-
index=[0, 1, 2])
65-
tm.assert_frame_equal(result, expected)
66-
67-
# how='right' (already sorted)
68-
result = df1.join(df2, how='right', sort=True)
69-
expected = pd.DataFrame({'a': [10, 20, np.nan], 'b': [100, 200, 300]},
70-
index=[1, 2, 3])
71-
tm.assert_frame_equal(result, expected)
72-
73-
# how='right'
74-
result = df2.join(df1, how='right', sort=True)
75-
expected = pd.DataFrame([[np.nan, 0], [100, 10], [200, 20]],
76-
columns=['b', 'a'], index=[0, 1, 2])
77-
tm.assert_frame_equal(result, expected)
78-
79-
# how='inner'
80-
result = df1.join(df2, how='inner', sort=True)
81-
expected = pd.DataFrame({'a': [10, 20], 'b': [100, 200]},
82-
index=[1, 2])
83-
tm.assert_frame_equal(result, expected)
84-
85-
# how='outer'
86-
result = df1.join(df2, how='outer', sort=True)
87-
expected = pd.DataFrame({'a': [0, 10, 20, np.nan],
88-
'b': [np.nan, 100, 200, 300]},
89-
index=[0, 1, 2, 3])
90-
tm.assert_frame_equal(result, expected)
64+
joined = f.join(f2)
65+
tm.assert_index_equal(f.index, joined.index)
66+
expected_columns = Index(['A', 'B', 'C', 'D'])
67+
tm.assert_index_equal(joined.columns, expected_columns)
9168

92-
def test_join_index(self):
93-
# left / right
69+
joined = f.join(f2, how='left')
70+
tm.assert_index_equal(joined.index, f.index)
71+
tm.assert_index_equal(joined.columns, expected_columns)
9472

95-
f = self.frame.loc[self.frame.index[:10], ['A', 'B']]
96-
f2 = self.frame.loc[self.frame.index[5:], ['C', 'D']].iloc[::-1]
73+
joined = f.join(f2, how='right')
74+
tm.assert_index_equal(joined.index, f2.index)
75+
tm.assert_index_equal(joined.columns, expected_columns)
9776

98-
joined = f.join(f2)
99-
tm.assert_index_equal(f.index, joined.index)
100-
expected_columns = pd.Index(['A', 'B', 'C', 'D'])
101-
tm.assert_index_equal(joined.columns, expected_columns)
77+
# inner
10278

103-
joined = f.join(f2, how='left')
104-
tm.assert_index_equal(joined.index, f.index)
105-
tm.assert_index_equal(joined.columns, expected_columns)
79+
joined = f.join(f2, how='inner')
80+
tm.assert_index_equal(joined.index, f.index[5:10])
81+
tm.assert_index_equal(joined.columns, expected_columns)
10682

107-
joined = f.join(f2, how='right')
108-
tm.assert_index_equal(joined.index, f2.index)
109-
tm.assert_index_equal(joined.columns, expected_columns)
83+
# outer
11084

111-
# inner
85+
joined = f.join(f2, how='outer')
86+
tm.assert_index_equal(joined.index, frame.index.sort_values())
87+
tm.assert_index_equal(joined.columns, expected_columns)
11288

113-
joined = f.join(f2, how='inner')
114-
tm.assert_index_equal(joined.index, f.index[5:10])
115-
tm.assert_index_equal(joined.columns, expected_columns)
89+
tm.assertRaisesRegexp(ValueError, 'join method', f.join, f2, how='foo')
11690

117-
# outer
91+
# corner case - overlapping columns
92+
for how in ('outer', 'left', 'inner'):
93+
with tm.assertRaisesRegexp(ValueError, 'columns overlap but '
94+
'no suffix'):
95+
frame.join(frame, how=how)
11896

119-
joined = f.join(f2, how='outer')
120-
tm.assert_index_equal(joined.index, self.frame.index.sort_values())
121-
tm.assert_index_equal(joined.columns, expected_columns)
12297

123-
tm.assertRaisesRegexp(ValueError, 'join method', f.join, f2, how='foo')
98+
def test_join_index_more(frame):
99+
af = frame.loc[:, ['A', 'B']]
100+
bf = frame.loc[::2, ['C', 'D']]
124101

125-
# corner case - overlapping columns
126-
for how in ('outer', 'left', 'inner'):
127-
with tm.assertRaisesRegexp(ValueError, 'columns overlap but '
128-
'no suffix'):
129-
self.frame.join(self.frame, how=how)
102+
expected = af.copy()
103+
expected['C'] = frame['C'][::2]
104+
expected['D'] = frame['D'][::2]
130105

131-
def test_join_index_more(self):
132-
af = self.frame.loc[:, ['A', 'B']]
133-
bf = self.frame.loc[::2, ['C', 'D']]
106+
result = af.join(bf)
107+
tm.assert_frame_equal(result, expected)
134108

135-
expected = af.copy()
136-
expected['C'] = self.frame['C'][::2]
137-
expected['D'] = self.frame['D'][::2]
109+
result = af.join(bf, how='right')
110+
tm.assert_frame_equal(result, expected[::2])
138111

139-
result = af.join(bf)
140-
tm.assert_frame_equal(result, expected)
112+
result = bf.join(af, how='right')
113+
tm.assert_frame_equal(result, expected.loc[:, result.columns])
141114

142-
result = af.join(bf, how='right')
143-
tm.assert_frame_equal(result, expected[::2])
144115

145-
result = bf.join(af, how='right')
146-
tm.assert_frame_equal(result, expected.loc[:, result.columns])
116+
def test_join_index_series(frame):
117+
df = frame.copy()
118+
s = df.pop(frame.columns[-1])
119+
joined = df.join(s)
147120

148-
def test_join_index_series(self):
149-
df = self.frame.copy()
150-
s = df.pop(self.frame.columns[-1])
151-
joined = df.join(s)
121+
# TODO should this check_names ?
122+
tm.assert_frame_equal(joined, frame, check_names=False)
152123

153-
# TODO should this check_names ?
154-
tm.assert_frame_equal(joined, self.frame, check_names=False)
124+
s.name = None
125+
tm.assertRaisesRegexp(ValueError, 'must have a name', df.join, s)
155126

156-
s.name = None
157-
tm.assertRaisesRegexp(ValueError, 'must have a name', df.join, s)
158127

159-
def test_join_overlap(self):
160-
df1 = self.frame.loc[:, ['A', 'B', 'C']]
161-
df2 = self.frame.loc[:, ['B', 'C', 'D']]
128+
def test_join_overlap(frame):
129+
df1 = frame.loc[:, ['A', 'B', 'C']]
130+
df2 = frame.loc[:, ['B', 'C', 'D']]
162131

163-
joined = df1.join(df2, lsuffix='_df1', rsuffix='_df2')
164-
df1_suf = df1.loc[:, ['B', 'C']].add_suffix('_df1')
165-
df2_suf = df2.loc[:, ['B', 'C']].add_suffix('_df2')
132+
joined = df1.join(df2, lsuffix='_df1', rsuffix='_df2')
133+
df1_suf = df1.loc[:, ['B', 'C']].add_suffix('_df1')
134+
df2_suf = df2.loc[:, ['B', 'C']].add_suffix('_df2')
166135

167-
no_overlap = self.frame.loc[:, ['A', 'D']]
168-
expected = df1_suf.join(df2_suf).join(no_overlap)
136+
no_overlap = frame.loc[:, ['A', 'D']]
137+
expected = df1_suf.join(df2_suf).join(no_overlap)
169138

170-
# column order not necessarily sorted
171-
tm.assert_frame_equal(joined, expected.loc[:, joined.columns])
139+
# column order not necessarily sorted
140+
tm.assert_frame_equal(joined, expected.loc[:, joined.columns])

0 commit comments

Comments
 (0)