Skip to content

Commit b2da9c7

Browse files
committed
Fix IntervalDtype Bugs and Inconsistencies
1 parent 4d571bb commit b2da9c7

File tree

3 files changed

+89
-56
lines changed

3 files changed

+89
-56
lines changed

doc/source/whatsnew/v0.23.0.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,7 @@ Other API Changes
205205
- Rearranged the order of keyword arguments in :func:`read_excel()` to align with :func:`read_csv()` (:issue:`16672`)
206206
- :func:`pandas.merge` now raises a ``ValueError`` when trying to merge on incompatible data types (:issue:`9780`)
207207
- :func:`wide_to_long` previously kept numeric-like suffixes as ``object`` dtype. Now they are cast to numeric if possible (:issue:`17627`)
208+
- ``IntervalDtype`` now returns ``True`` when compared against ``'interval'`` regardless of subtype, and ``IntervalDtype.name`` now returns ``'interval'`` regardless of subtype (:issue:`18980`)
208209

209210
.. _whatsnew_0230.deprecations:
210211

@@ -294,6 +295,7 @@ Conversion
294295
- Bug in :meth:`DatetimeIndex.astype` when converting between timezone aware dtypes, and converting from timezone aware to naive (:issue:`18951`)
295296
- Bug in :class:`FY5253` where ``datetime`` addition and subtraction incremented incorrectly for dates on the year-end but not normalized to midnight (:issue:`18854`)
296297
- Bug in :class:`DatetimeIndex` where adding or subtracting an array-like of ``DateOffset`` objects either raised (``np.array``, ``pd.Index``) or broadcast incorrectly (``pd.Series``) (:issue:`18849`)
298+
- Bug in ``IntervalDtype`` when constructing two instances with subtype ``CategoricalDtype`` where the second instance used cached attributes from the first (:issue:`18980`)
297299

298300

299301
Indexing

pandas/core/dtypes/dtypes.py

Lines changed: 20 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -626,6 +626,7 @@ class IntervalDtype(ExtensionDtype):
626626
627627
THIS IS NOT A REAL NUMPY DTYPE
628628
"""
629+
name = 'interval'
629630
type = IntervalDtypeType
630631
kind = None
631632
str = '|O08'
@@ -651,8 +652,8 @@ def __new__(cls, subtype=None):
651652
u.subtype = None
652653
return u
653654
elif (isinstance(subtype, compat.string_types) and
654-
subtype == 'interval'):
655-
subtype = ''
655+
subtype in ('interval', 'interval[]')):
656+
subtype = None
656657
else:
657658
if isinstance(subtype, compat.string_types):
658659
m = cls._match.search(subtype)
@@ -665,17 +666,16 @@ def __new__(cls, subtype=None):
665666
except TypeError:
666667
raise ValueError("could not construct IntervalDtype")
667668

668-
if subtype is None:
669-
u = object.__new__(cls)
670-
u.subtype = None
671-
return u
672-
673669
try:
674-
return cls._cache[str(subtype)]
670+
# GH 18980: need to combine since str and hash individually may not
671+
# be unique, e.g. str(CategoricalDtype) always returns 'category',
672+
# and hash(np.dtype('<m8')) == hash(np.dtype('<m8[ns]'))
673+
key = ''.join([str(subtype), str(hash(subtype))])
674+
return cls._cache[key]
675675
except KeyError:
676676
u = object.__new__(cls)
677677
u.subtype = subtype
678-
cls._cache[str(subtype)] = u
678+
cls._cache[key] = u
679679
return u
680680

681681
@classmethod
@@ -685,31 +685,29 @@ def construct_from_string(cls, string):
685685
if its not possible
686686
"""
687687
if isinstance(string, compat.string_types):
688-
try:
689-
return cls(string)
690-
except ValueError:
691-
pass
692-
raise TypeError("could not construct IntervalDtype")
688+
return cls(string)
689+
msg = "a string needs to be passed, got type {typ}"
690+
raise TypeError(msg.format(typ=type(string)))
693691

694692
def __unicode__(self):
695693
if self.subtype is None:
696694
return "interval"
697695
return "interval[{subtype}]".format(subtype=self.subtype)
698696

699-
@property
700-
def name(self):
701-
return str(self)
702-
703697
def __hash__(self):
704698
# make myself hashable
705699
return hash(str(self))
706700

707701
def __eq__(self, other):
708702
if isinstance(other, compat.string_types):
709-
return other == self.name or other == self.name.title()
710-
711-
return (isinstance(other, IntervalDtype) and
712-
self.subtype == other.subtype)
703+
return other.title() in (self.name.title(), str(self).title())
704+
elif not isinstance(other, IntervalDtype):
705+
return False
706+
elif self.subtype is None or other.subtype is None:
707+
# None should match any subtype
708+
return True
709+
else:
710+
return self.subtype == other.subtype
713711

714712
@classmethod
715713
def is_dtype(cls, dtype):

pandas/tests/dtypes/test_dtypes.py

Lines changed: 67 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -433,7 +433,7 @@ def test_hash_vs_equality(self):
433433
assert dtype2 == dtype
434434
assert dtype3 == dtype
435435
assert dtype is dtype2
436-
assert dtype2 is dtype
436+
assert dtype2 is dtype3
437437
assert dtype3 is dtype
438438
assert hash(dtype) == hash(dtype2)
439439
assert hash(dtype) == hash(dtype3)
@@ -451,26 +451,41 @@ def test_hash_vs_equality(self):
451451
assert hash(dtype2) == hash(dtype2)
452452
assert hash(dtype2) == hash(dtype3)
453453

454-
def test_construction(self):
455-
with pytest.raises(ValueError):
456-
IntervalDtype('xx')
457-
458-
for s in ['interval[int64]', 'Interval[int64]', 'int64']:
459-
i = IntervalDtype(s)
460-
assert i.subtype == np.dtype('int64')
461-
assert is_interval_dtype(i)
462-
463-
def test_construction_generic(self):
464-
# generic
465-
i = IntervalDtype('interval')
466-
assert i.subtype == ''
454+
@pytest.mark.parametrize('subtype', [
455+
'interval[int64]', 'Interval[int64]', 'int64', np.dtype('int64')])
456+
def test_construction(self, subtype):
457+
i = IntervalDtype(subtype)
458+
assert i.subtype == np.dtype('int64')
467459
assert is_interval_dtype(i)
468-
assert str(i) == 'interval[]'
469460

470-
i = IntervalDtype()
461+
@pytest.mark.parametrize('subtype', [None, 'interval', 'interval[]'])
462+
def test_construction_generic(self, subtype):
463+
# generic
464+
i = IntervalDtype(subtype)
471465
assert i.subtype is None
472466
assert is_interval_dtype(i)
473-
assert str(i) == 'interval'
467+
468+
def test_construction_errors(self):
469+
msg = 'could not construct IntervalDtype'
470+
with tm.assert_raises_regex(ValueError, msg):
471+
IntervalDtype('xx')
472+
473+
def test_construction_from_string(self):
474+
result = IntervalDtype('interval[int64]')
475+
assert is_dtype_equal(self.dtype, result)
476+
result = IntervalDtype.construct_from_string('interval[int64]')
477+
assert is_dtype_equal(self.dtype, result)
478+
479+
@pytest.mark.parametrize('string', [
480+
'foo', 'interval[foo]', 'foo[int64]', 0, 3.14, ('a', 'b'), None])
481+
def test_construction_from_string_errors(self, string):
482+
if isinstance(string, string_types):
483+
error, msg = ValueError, 'could not construct IntervalDtype'
484+
else:
485+
error, msg = TypeError, 'a string needs to be passed, got type'
486+
487+
with tm.assert_raises_regex(error, msg):
488+
IntervalDtype.construct_from_string(string)
474489

475490
def test_subclass(self):
476491
a = IntervalDtype('interval[int64]')
@@ -495,36 +510,45 @@ def test_is_dtype(self):
495510
assert not IntervalDtype.is_dtype(np.int64)
496511
assert not IntervalDtype.is_dtype(np.float64)
497512

498-
def test_identity(self):
499-
assert (IntervalDtype('interval[int64]') ==
500-
IntervalDtype('interval[int64]'))
501-
502513
def test_coerce_to_dtype(self):
503514
assert (_coerce_to_dtype('interval[int64]') ==
504515
IntervalDtype('interval[int64]'))
505516

506-
def test_construction_from_string(self):
507-
result = IntervalDtype('interval[int64]')
508-
assert is_dtype_equal(self.dtype, result)
509-
result = IntervalDtype.construct_from_string('interval[int64]')
510-
assert is_dtype_equal(self.dtype, result)
511-
with pytest.raises(TypeError):
512-
IntervalDtype.construct_from_string('foo')
513-
with pytest.raises(TypeError):
514-
IntervalDtype.construct_from_string('interval[foo]')
515-
with pytest.raises(TypeError):
516-
IntervalDtype.construct_from_string('foo[int64]')
517-
518517
def test_equality(self):
519518
assert is_dtype_equal(self.dtype, 'interval[int64]')
520519
assert is_dtype_equal(self.dtype, IntervalDtype('int64'))
521-
assert is_dtype_equal(self.dtype, IntervalDtype('int64'))
522520
assert is_dtype_equal(IntervalDtype('int64'), IntervalDtype('int64'))
523521

524522
assert not is_dtype_equal(self.dtype, 'int64')
525523
assert not is_dtype_equal(IntervalDtype('int64'),
526524
IntervalDtype('float64'))
527525

526+
@pytest.mark.parametrize('subtype', [
527+
None, 'interval', 'interval[]', 'int64', 'uint64', 'float64', object,
528+
CategoricalDtype(), 'datetime64', 'timedelta64', PeriodDtype('Q')])
529+
def test_equality_generic(self, subtype):
530+
# GH 18980
531+
dtype = IntervalDtype(subtype)
532+
assert is_dtype_equal(dtype, 'interval')
533+
assert is_dtype_equal(dtype, IntervalDtype())
534+
535+
@pytest.mark.parametrize('subtype', [
536+
'int64', 'uint64', 'float64', 'complex128', np.dtype('O'),
537+
CategoricalDtype(), 'datetime64', 'timedelta64', PeriodDtype('Q')])
538+
def test_name_repr(self, subtype):
539+
# GH 18980
540+
dtype = IntervalDtype(subtype)
541+
expected = 'interval[{subtype}]'.format(subtype=subtype)
542+
assert str(dtype) == expected
543+
assert dtype.name == 'interval'
544+
545+
@pytest.mark.parametrize('subtype', [None, 'interval', 'interval[]'])
546+
def test_name_repr_generic(self, subtype):
547+
# GH 18980
548+
dtype = IntervalDtype(subtype)
549+
assert str(dtype) == 'interval'
550+
assert dtype.name == 'interval'
551+
528552
def test_basic(self):
529553
assert is_interval_dtype(self.dtype)
530554

@@ -565,6 +589,15 @@ def test_caching(self):
565589
tm.round_trip_pickle(dtype)
566590
assert len(IntervalDtype._cache) == 0
567591

592+
def test_caching_categoricaldtype(self):
593+
# GH 18980
594+
cdt1 = CategoricalDtype(list('abc'), True)
595+
cdt2 = CategoricalDtype(list('wxyz'), False)
596+
idt1 = IntervalDtype(cdt1)
597+
idt2 = IntervalDtype(cdt2)
598+
assert idt1.subtype is cdt1
599+
assert idt2.subtype is cdt2
600+
568601

569602
class TestCategoricalDtypeParametrized(object):
570603

0 commit comments

Comments
 (0)