-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
[WIP]API: CategoricalType for specifying categoricals #14698
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,7 @@ | ||
""" define extension dtypes """ | ||
|
||
import re | ||
import reprlib | ||
import numpy as np | ||
from pandas import compat | ||
|
||
|
@@ -98,25 +99,73 @@ class CategoricalDtypeType(type): | |
class CategoricalDtype(ExtensionDtype): | ||
|
||
""" | ||
Type for categorical data with the categories and orderedness, | ||
but not the values | ||
|
||
.. versionadded:: 0.20.0 | ||
|
||
Parameters | ||
---------- | ||
categories : list or None | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. list-like, use similar to whats in Categorical now |
||
ordered : bool, default False | ||
|
||
Examples | ||
-------- | ||
>>> t = CategoricalDtype(categories=['b', 'a'], ordered=True) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. these examples are not relevant. This should be not include Series. This is a self-contained type. |
||
>>> s = Series(['a', 'a', 'b', 'b', 'a']) | ||
>>> s.astype(t) | ||
0 a | ||
1 a | ||
2 b | ||
3 b | ||
4 a | ||
dtype: category | ||
Categories (2, object): [b < a] | ||
|
||
Notes | ||
----- | ||
An instance of ``CategoricalDtype`` compares equal with any other | ||
instance of ``CategoricalDtype``, regardless of categories or ordered. | ||
In addition they compare equal to the string ``'category'``. | ||
To check whether two instances of a ``CategoricalDtype`` match, | ||
use the ``is`` operator. | ||
|
||
>>> t1 = CategoricalDtype(['a', 'b'], ordered=True) | ||
>>> t2 = CategoricalDtype(['a', 'c'], ordered=False) | ||
>>> t1 == t2 | ||
True | ||
>>> t1 == 'category' | ||
True | ||
>>> t1 is t2 | ||
False | ||
>>> t1 is CategoricalDtype(['a', 'b'], ordered=True) | ||
True | ||
|
||
A np.dtype duck-typed class, suitable for holding a custom categorical | ||
dtype. | ||
|
||
THIS IS NOT A REAL NUMPY DTYPE, but essentially a sub-class of np.object | ||
""" | ||
# TODO: Document public vs. private API | ||
name = 'category' | ||
type = CategoricalDtypeType | ||
kind = 'O' | ||
str = '|O08' | ||
base = np.dtype('O') | ||
_cache = {} | ||
|
||
def __new__(cls): | ||
def __new__(cls, categories=None, ordered=False): | ||
categories_ = categories if categories is None else tuple(categories) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this needs all of the validation logic (from Categorical). on the actual categories. |
||
t = (categories_, ordered) | ||
|
||
try: | ||
return cls._cache[cls.name] | ||
return cls._cache[t] | ||
except KeyError: | ||
c = object.__new__(cls) | ||
cls._cache[cls.name] = c | ||
c.categories = categories | ||
c.ordered = ordered | ||
|
||
cls._cache[t] = c | ||
return c | ||
|
||
def __hash__(self): | ||
|
@@ -129,6 +178,15 @@ def __eq__(self, other): | |
|
||
return isinstance(other, CategoricalDtype) | ||
|
||
# def __unicode__(self): | ||
# tpl = 'CategoricalDtype({!r}, ordered={})' | ||
# return tpl.format(reprlib.repr(self.categories), self.ordered) | ||
|
||
# def __repr__(self): | ||
# """ return the base repr for the categories """ | ||
# tpl = 'CategoricalDtype({!r}, ordered={})' | ||
# return tpl.format(reprlib.repr(self.categories), self.ordered) | ||
|
||
@classmethod | ||
def construct_from_string(cls, string): | ||
""" attempt to construct this type from a string, raise a TypeError if | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this has been around for quite some time, you are adding parameter support in 0.20.0