diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt index 37032ff6bc313..9dc10a09378f8 100644 --- a/doc/source/whatsnew/v0.22.0.txt +++ b/doc/source/whatsnew/v0.22.0.txt @@ -138,6 +138,7 @@ Other Enhancements - :func:`Series` / :func:`DataFrame` tab completion also returns identifiers in the first level of a :func:`MultiIndex`. (:issue:`16326`) - :func:`read_excel()` has gained the ``nrows`` parameter (:issue:`16645`) - :func:``DataFrame.to_json`` and ``Series.to_json`` now accept an ``index`` argument which allows the user to exclude the index from the JSON output (:issue:`17394`) +- ``IntervalIndex.to_tuples()`` has gained the ``na_tuple`` parameter to control whether NA is returned as a tuple of NA, or NA itself (:issue:`18756`) .. _whatsnew_0220.api_breaking: diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 292b0f638f821..cb786574909db 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -544,9 +544,31 @@ def from_tuples(cls, data, closed='right', name=None, copy=False): return cls.from_arrays(left, right, closed, name=name, copy=False) - def to_tuples(self): - """Return an Index of tuples of the form (left, right)""" - return Index(_asarray_tuplesafe(zip(self.left, self.right))) + def to_tuples(self, na_tuple=True): + """ + Return an Index of tuples of the form (left, right) + + Parameters + ---------- + na_tuple : boolean, default True + Returns NA as a tuple if True, ``(nan, nan)``, or just as the NA + value itself if False, ``nan``. + + ..versionadded:: 0.22.0 + + Examples + -------- + >>> idx = pd.IntervalIndex.from_arrays([0, np.nan, 2], [1, np.nan, 3]) + >>> idx.to_tuples() + Index([(0.0, 1.0), (nan, nan), (2.0, 3.0)], dtype='object') + >>> idx.to_tuples(na_tuple=False) + Index([(0.0, 1.0), nan, (2.0, 3.0)], dtype='object') + """ + tuples = _asarray_tuplesafe(zip(self.left, self.right)) + if not na_tuple: + # GH 18756 + tuples = np.where(~self._isnan, tuples, np.nan) + return Index(tuples) @cache_readonly def _multiindex(self): diff --git a/pandas/tests/indexes/test_interval.py b/pandas/tests/indexes/test_interval.py index abad930793d7f..c809127a66ab8 100644 --- a/pandas/tests/indexes/test_interval.py +++ b/pandas/tests/indexes/test_interval.py @@ -7,6 +7,7 @@ Interval, IntervalIndex, Index, isna, notna, interval_range, Timestamp, Timedelta, compat, date_range, timedelta_range, DateOffset) from pandas.compat import lzip +from pandas.core.common import _asarray_tuplesafe from pandas.tseries.offsets import Day from pandas._libs.interval import IntervalTree from pandas.tests.indexes.common import Base @@ -1072,6 +1073,45 @@ def test_is_non_overlapping_monotonic(self, closed): idx = IntervalIndex.from_breaks(range(4), closed=closed) assert idx.is_non_overlapping_monotonic is True + @pytest.mark.parametrize('tuples', [ + lzip(range(10), range(1, 11)), + lzip(date_range('20170101', periods=10), + date_range('20170101', periods=10)), + lzip(timedelta_range('0 days', periods=10), + timedelta_range('1 day', periods=10))]) + def test_to_tuples(self, tuples): + # GH 18756 + idx = IntervalIndex.from_tuples(tuples) + result = idx.to_tuples() + expected = Index(_asarray_tuplesafe(tuples)) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize('tuples', [ + lzip(range(10), range(1, 11)) + [np.nan], + lzip(date_range('20170101', periods=10), + date_range('20170101', periods=10)) + [np.nan], + lzip(timedelta_range('0 days', periods=10), + timedelta_range('1 day', periods=10)) + [np.nan]]) + @pytest.mark.parametrize('na_tuple', [True, False]) + def test_to_tuples_na(self, tuples, na_tuple): + # GH 18756 + idx = IntervalIndex.from_tuples(tuples) + result = idx.to_tuples(na_tuple=na_tuple) + + # check the non-NA portion + expected_notna = Index(_asarray_tuplesafe(tuples[:-1])) + result_notna = result[:-1] + tm.assert_index_equal(result_notna, expected_notna) + + # check the NA portion + result_na = result[-1] + if na_tuple: + assert isinstance(result_na, tuple) + assert len(result_na) == 2 + assert all(isna(x) for x in result_na) + else: + assert isna(result_na) + class TestIntervalRange(object):