Skip to content

BUG: fix missing sort keyword for PeriodIndex.join #16586

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Jul 7, 2017
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v0.20.3.txt
Original file line number Diff line number Diff line change
@@ -82,7 +82,7 @@ Sparse

Reshaping
^^^^^^^^^

- ``PeriodIndex`` / ``TimedeltaIndex.join`` was missing the ``sort=`` kwarg (:issue:`16541`)
- Bug in joining on a ``MultiIndex`` with a ``category`` dtype for a level (:issue:`16627`).
- Bug in :func:`merge` when merging/joining with multiple categorical columns (:issue:`16767`)

2 changes: 1 addition & 1 deletion pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
@@ -3126,7 +3126,7 @@ def _join_non_unique(self, other, how='left', return_indexers=False):
left_idx = _ensure_platform_int(left_idx)
right_idx = _ensure_platform_int(right_idx)

join_index = self.values.take(left_idx)
join_index = np.asarray(self.values.take(left_idx))
mask = left_idx == -1
np.putmask(join_index, mask, other._values.take(right_idx))

6 changes: 4 additions & 2 deletions pandas/core/indexes/period.py
Original file line number Diff line number Diff line change
@@ -912,14 +912,16 @@ def insert(self, loc, item):
self[loc:].asi8))
return self._shallow_copy(idx)

def join(self, other, how='left', level=None, return_indexers=False):
def join(self, other, how='left', level=None, return_indexers=False,
sort=False):
"""
See Index.join
"""
self._assert_can_do_setop(other)

result = Int64Index.join(self, other, how=how, level=level,
return_indexers=return_indexers)
return_indexers=return_indexers,
sort=sort)

if return_indexers:
result, lidx, ridx = result
6 changes: 4 additions & 2 deletions pandas/core/indexes/timedeltas.py
Original file line number Diff line number Diff line change
@@ -516,7 +516,8 @@ def union(self, other):
result.freq = to_offset(result.inferred_freq)
return result

def join(self, other, how='left', level=None, return_indexers=False):
def join(self, other, how='left', level=None, return_indexers=False,
sort=False):
"""
See Index.join
"""
@@ -527,7 +528,8 @@ def join(self, other, how='left', level=None, return_indexers=False):
pass

return Index.join(self, other, how=how, level=level,
return_indexers=return_indexers)
return_indexers=return_indexers,
sort=sort)

def _wrap_joined_index(self, joined, other):
name = self.name if self.name == other.name else None
28 changes: 27 additions & 1 deletion pandas/tests/frame/test_join.py
Original file line number Diff line number Diff line change
@@ -3,11 +3,19 @@
import pytest
import numpy as np

from pandas import DataFrame, Index
from pandas import DataFrame, Index, PeriodIndex
from pandas.tests.frame.common import TestData
import pandas.util.testing as tm


@pytest.fixture
def frame_with_period_index():
return DataFrame(
data=np.arange(20).reshape(4, 5),
columns=list('abcde'),
index=PeriodIndex(start='2000', freq='A', periods=4))


@pytest.fixture
def frame():
return TestData().frame
@@ -139,3 +147,21 @@ def test_join_overlap(frame):

# column order not necessarily sorted
tm.assert_frame_equal(joined, expected.loc[:, joined.columns])


def test_join_period_index(frame_with_period_index):
other = frame_with_period_index.rename(
columns=lambda x: '{key}{key}'.format(key=x))

joined_values = np.concatenate(
[frame_with_period_index.values] * 2, axis=1)

joined_cols = frame_with_period_index.columns.append(other.columns)

joined = frame_with_period_index.join(other)
expected = DataFrame(
data=joined_values,
columns=joined_cols,
index=frame_with_period_index.index)

tm.assert_frame_equal(joined, expected)
9 changes: 8 additions & 1 deletion pandas/tests/indexes/common.py
Original file line number Diff line number Diff line change
@@ -905,7 +905,7 @@ def test_fillna(self):

def test_nulls(self):
# this is really a smoke test for the methods
# as these are adequantely tested for function elsewhere
# as these are adequately tested for function elsewhere

for name, index in self.indices.items():
if len(index) == 0:
@@ -933,3 +933,10 @@ def test_empty(self):
index = self.create_index()
assert not index.empty
assert index[:0].empty

@pytest.mark.parametrize('how', ['outer', 'inner', 'left', 'right'])
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you also add the tests from the original issue (it was on PI) as a specific test.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I mean this test here: #16541 (comment)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do mean mean add the code that generated the issue as a test (joining two randomly generated 5x5 DataFrames)? I don't really understand what you're asking me to do.

def test_join_self_unique(self, how):
index = self.create_index()
if index.is_unique:
joined = index.join(index, how=how)
assert (index == joined).all()
6 changes: 6 additions & 0 deletions pandas/tests/indexes/period/test_period.py
Original file line number Diff line number Diff line change
@@ -773,3 +773,9 @@ def test_map(self):
result = index.map(lambda x: x.ordinal)
exp = Index([x.ordinal for x in index])
tm.assert_index_equal(result, exp)

@pytest.mark.parametrize('how', ['outer', 'inner', 'left', 'right'])
def test_join_self(self, how):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add this to tests/indexes/common.py using self.create_index ? The this should automatically be run for all index types

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

added. Non-unique CategoricalIndex was breaking in a non-obvious way in _join_non_unique so have a look a that fix in particular.

index = period_range('1/1/2000', periods=10)
joined = index.join(index, how=how)
assert index is joined