From c815d62fdb10312c842814e94a82fc4e8f3c4e6e Mon Sep 17 00:00:00 2001 From: tp Date: Mon, 14 May 2018 01:05:30 +0100 Subject: [PATCH] improved performance of CategoricalIndex.is_monotonic* --- asv_bench/benchmarks/categoricals.py | 20 +++++++++++++++++++ doc/source/whatsnew/v0.23.1.txt | 1 + pandas/core/indexes/category.py | 4 ++-- pandas/tests/indexes/test_category.py | 28 ++++++++++++++++----------- 4 files changed, 40 insertions(+), 13 deletions(-) diff --git a/asv_bench/benchmarks/categoricals.py b/asv_bench/benchmarks/categoricals.py index ae1d7029217a4..5464e7cba22c3 100644 --- a/asv_bench/benchmarks/categoricals.py +++ b/asv_bench/benchmarks/categoricals.py @@ -173,3 +173,23 @@ def setup(self, dtype): def time_isin_categorical(self, dtype): self.series.isin(self.sample) + + +class IsMonotonic(object): + + def setup(self): + N = 1000 + self.c = pd.CategoricalIndex(list('a' * N + 'b' * N + 'c' * N)) + self.s = pd.Series(self.c) + + def time_categorical_index_is_monotonic_increasing(self): + self.c.is_monotonic_increasing + + def time_categorical_index_is_monotonic_decreasing(self): + self.c.is_monotonic_decreasing + + def time_categorical_series_is_monotonic_increasing(self): + self.s.is_monotonic_increasing + + def time_categorical_series_is_monotonic_decreasing(self): + self.s.is_monotonic_decreasing diff --git a/doc/source/whatsnew/v0.23.1.txt b/doc/source/whatsnew/v0.23.1.txt index 5c9c3e2931bd9..8c5111e712a34 100644 --- a/doc/source/whatsnew/v0.23.1.txt +++ b/doc/source/whatsnew/v0.23.1.txt @@ -29,6 +29,7 @@ Deprecations Performance Improvements ~~~~~~~~~~~~~~~~~~~~~~~~ +- Improved performance of :meth:`CategoricalIndex.is_monotonic_increasing`, :meth:`CategoricalIndex.is_monotonic_decreasing` and :meth:`CategoricalIndex.is_monotonic` (:issue:`21025`) - - diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 3ffef5804acf7..78b7ae7054248 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -382,11 +382,11 @@ def is_unique(self): @property def is_monotonic_increasing(self): - return Index(self.codes).is_monotonic_increasing + return self._engine.is_monotonic_increasing @property def is_monotonic_decreasing(self): - return Index(self.codes).is_monotonic_decreasing + return self._engine.is_monotonic_decreasing @Appender(_index_shared_docs['index_unique'] % _index_doc_kwargs) def unique(self, level=None): diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index 6a1a1a5bdba4f..0e630f69b1a32 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -543,35 +543,41 @@ def test_reindex_empty_index(self): tm.assert_numpy_array_equal(indexer, np.array([-1, -1], dtype=np.intp)) - def test_is_monotonic(self): - c = CategoricalIndex([1, 2, 3]) + @pytest.mark.parametrize('data, non_lexsorted_data', [ + [[1, 2, 3], [9, 0, 1, 2, 3]], + [list('abc'), list('fabcd')], + ]) + def test_is_monotonic(self, data, non_lexsorted_data): + c = CategoricalIndex(data) assert c.is_monotonic_increasing assert not c.is_monotonic_decreasing - c = CategoricalIndex([1, 2, 3], ordered=True) + c = CategoricalIndex(data, ordered=True) assert c.is_monotonic_increasing assert not c.is_monotonic_decreasing - c = CategoricalIndex([1, 2, 3], categories=[3, 2, 1]) + c = CategoricalIndex(data, categories=reversed(data)) assert not c.is_monotonic_increasing assert c.is_monotonic_decreasing - c = CategoricalIndex([1, 3, 2], categories=[3, 2, 1]) + c = CategoricalIndex(data, categories=reversed(data), ordered=True) assert not c.is_monotonic_increasing - assert not c.is_monotonic_decreasing + assert c.is_monotonic_decreasing - c = CategoricalIndex([1, 2, 3], categories=[3, 2, 1], ordered=True) + # test when data is neither monotonic increasing nor decreasing + reordered_data = [data[0], data[2], data[1]] + c = CategoricalIndex(reordered_data, categories=reversed(data)) assert not c.is_monotonic_increasing - assert c.is_monotonic_decreasing + assert not c.is_monotonic_decreasing # non lexsorted categories - categories = [9, 0, 1, 2, 3] + categories = non_lexsorted_data - c = CategoricalIndex([9, 0], categories=categories) + c = CategoricalIndex(categories[:2], categories=categories) assert c.is_monotonic_increasing assert not c.is_monotonic_decreasing - c = CategoricalIndex([0, 1], categories=categories) + c = CategoricalIndex(categories[1:3], categories=categories) assert c.is_monotonic_increasing assert not c.is_monotonic_decreasing