Skip to content

Commit 7c79ebc

Browse files
committed
remove need for array types
1 parent a661c0b commit 7c79ebc

File tree

11 files changed

+195
-126
lines changed

11 files changed

+195
-126
lines changed

pandas/core/arrays/__init__.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,4 @@
11
from .base import ExtensionArray # noqa
22
from .categorical import Categorical # noqa
33
from .integer import ( # noqa
4-
Int8Array, Int16Array, Int32Array, Int64Array,
5-
UInt8Array, UInt16Array, UInt32Array, UInt64Array,
6-
to_integer_array)
4+
IntegerArray, to_integer_array)

pandas/core/arrays/base.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -87,14 +87,16 @@ class ExtensionArray(object):
8787
# Constructors
8888
# ------------------------------------------------------------------------
8989
@classmethod
90-
def _from_sequence(cls, scalars, copy=False):
90+
def _from_sequence(cls, scalars, dtype=None, copy=False):
9191
"""Construct a new ExtensionArray from a sequence of scalars.
9292
9393
Parameters
9494
----------
9595
scalars : Sequence
9696
Each element will be an instance of the scalar type for this
9797
array, ``cls.dtype.type``.
98+
dtype : Dtype, optional
99+
consruct for this particular dtype
98100
copy : boolean, default True
99101
if True, copy the underlying data
100102
Returns
@@ -377,7 +379,7 @@ def fillna(self, value=None, method=None, limit=None):
377379
func = pad_1d if method == 'pad' else backfill_1d
378380
new_values = func(self.astype(object), limit=limit,
379381
mask=mask)
380-
new_values = self._from_sequence(new_values)
382+
new_values = self._from_sequence(new_values, dtype=self.dtype)
381383
else:
382384
# fill with value
383385
new_values = self.copy()
@@ -406,7 +408,7 @@ def unique(self):
406408
from pandas import unique
407409

408410
uniques = unique(self.astype(object))
409-
return self._from_sequence(uniques)
411+
return self._from_sequence(uniques, dtype=self.dtype)
410412

411413
def _values_for_factorize(self):
412414
# type: () -> Tuple[ndarray, Any]
@@ -558,7 +560,7 @@ def take(self, indices, allow_fill=False, fill_value=None):
558560
559561
result = take(data, indices, fill_value=fill_value,
560562
allow_fill=allow_fill)
561-
return self._from_sequence(result)
563+
return self._from_sequence(result, dtype=self.dtype)
562564
"""
563565
# Implementer note: The `fill_value` parameter should be a user-facing
564566
# value, an instance of self.dtype.type. When passed `fill_value=None`,

pandas/core/arrays/categorical.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -436,8 +436,8 @@ def _constructor(self):
436436
return Categorical
437437

438438
@classmethod
439-
def _from_sequence(cls, scalars):
440-
return Categorical(scalars)
439+
def _from_sequence(cls, scalars, dtype=None, copy=False):
440+
return Categorical(scalars, dtype=dtype)
441441

442442
def copy(self):
443443
""" Copy constructor. """

pandas/core/arrays/integer.py

+124-75
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22
import warnings
33
import numpy as np
44

5+
from pandas.compat import u
56
from pandas.core.dtypes.generic import ABCSeries, ABCIndexClass
7+
from pandas.util._decorators import cache_readonly
68
from pandas.compat import set_function_name
79
from pandas.api.types import (is_integer, is_scalar, is_float,
810
is_float_dtype, is_integer_dtype,
@@ -12,21 +14,44 @@
1214
from pandas.core.dtypes.base import ExtensionDtype
1315
from pandas.core.dtypes.dtypes import registry
1416
from pandas.core.dtypes.missing import isna, notna
15-
16-
# available dtypes
17-
_integer_dtypes = ['int8', 'int16', 'int32', 'int64']
18-
_integer_formatter = lambda x: x.capitalize()
19-
_unsigned_dtypes = ['uint8', 'uint16', 'uint32', 'uint64']
20-
_unsigned_formatter = lambda x: "{}{}".format(x[0].upper(), x[1:].capitalize())
17+
from pandas.io.formats.printing import (
18+
format_object_summary, format_object_attrs, default_pprint)
2119

2220

2321
class IntegerDtype(ExtensionDtype):
2422
type = None
2523
na_value = np.nan
26-
kind = 'i'
27-
is_integer = True
28-
is_signed_integer = True
29-
is_unsigned_integer = False
24+
25+
@cache_readonly
26+
def is_signed_integer(self):
27+
return self.kind == 'i'
28+
29+
@cache_readonly
30+
def is_unsigned_integer(self):
31+
return self.kind == 'u'
32+
33+
@cache_readonly
34+
def numpy_dtype(self):
35+
""" Return an instance of our numpy dtype """
36+
return np.dtype(self.type)
37+
38+
@cache_readonly
39+
def kind(self):
40+
return self.numpy_dtype.kind
41+
42+
@classmethod
43+
def construct_array_type(cls, array):
44+
"""Return the array type associated with this dtype
45+
46+
Parameters
47+
----------
48+
array : value array
49+
50+
Returns
51+
-------
52+
type
53+
"""
54+
return IntegerArray
3055

3156
@classmethod
3257
def construct_from_string(cls, string):
@@ -40,12 +65,6 @@ def construct_from_string(cls, string):
4065
"'{}'".format(cls, string))
4166

4267

43-
class UnsignedIntegerDtype(IntegerDtype):
44-
kind = 'u'
45-
is_signed_integer = False
46-
is_unsigned_integer = True
47-
48-
4968
def to_integer_array(values):
5069
"""
5170
Parameters
@@ -61,13 +80,14 @@ def to_integer_array(values):
6180
TypeError if incompatible types
6281
"""
6382
values = np.array(values, copy=False)
64-
kind = 'UInt' if values.dtype.kind == 'u' else 'Int'
65-
array_type = "{}{}Array".format(kind, values.dtype.itemsize * 8)
6683
try:
67-
array_type = getattr(module, array_type)
68-
except AttributeError:
84+
dtype = _dtypes[str(values.dtype)]
85+
except KeyError:
86+
if is_float_dtype(values):
87+
return IntegerArray(values)
88+
6989
raise TypeError("Incompatible dtype for {}".format(values.dtype))
70-
return array_type(values, copy=False)
90+
return IntegerArray(values, dtype=dtype, copy=False)
7191

7292

7393
def coerce_to_array(values, dtype, mask=None, copy=False):
@@ -86,6 +106,14 @@ def coerce_to_array(values, dtype, mask=None, copy=False):
86106
-------
87107
tuple of (values, mask)
88108
"""
109+
110+
if isinstance(values, IntegerArray):
111+
values, mask = values.data, values.mask
112+
if copy:
113+
values = values.copy()
114+
mask = mask.copy()
115+
return values, mask
116+
89117
values = np.array(values, copy=copy)
90118
if is_object_dtype(values):
91119
inferred_type = infer_dtype(values)
@@ -112,14 +140,23 @@ def coerce_to_array(values, dtype, mask=None, copy=False):
112140
if is_object_dtype(values):
113141
mask |= isna(values)
114142

143+
# infer dtype if needed
144+
if dtype is None:
145+
if is_integer_dtype(values):
146+
dtype = values.dtype
147+
else:
148+
dtype = np.dtype('int64')
149+
else:
150+
dtype = dtype.type
151+
115152
# we copy as need to coerce here
116153
if mask.any():
117154
values = values.copy()
118155
values[mask] = 1
119156

120-
values = values.astype(dtype.type)
157+
values = values.astype(dtype)
121158
else:
122-
values = values.astype(dtype.type, copy=False)
159+
values = values.astype(dtype, copy=False)
123160

124161
return values, mask
125162

@@ -131,26 +168,30 @@ class IntegerArray(ExtensionArray):
131168
- mask: a boolean array holding a mask on the data, False is missing
132169
"""
133170

134-
dtype = None
171+
@cache_readonly
172+
def dtype(self):
173+
return _dtypes[str(self.data.dtype)]
135174

136-
def __init__(self, values, mask=None, copy=False):
175+
def __init__(self, values, mask=None, dtype=None, copy=False):
137176
self.data, self.mask = coerce_to_array(
138-
values, dtype=self.dtype, mask=mask, copy=copy)
177+
values, dtype=dtype, mask=mask, copy=copy)
139178

140179
@classmethod
141-
def _from_sequence(cls, scalars, mask=None, copy=False):
142-
return cls(scalars, mask=mask, copy=copy)
180+
def _from_sequence(cls, scalars, mask=None, dtype=None, copy=False):
181+
return cls(scalars, mask=mask, dtype=dtype, copy=copy)
143182

144183
@classmethod
145184
def _from_factorized(cls, values, original):
146-
return cls(values)
185+
return cls(values, dtype=original.dtype)
147186

148187
def __getitem__(self, item):
149188
if is_integer(item):
150189
if self.mask[item]:
151190
return self.dtype.na_value
152191
return self.data[item]
153-
return type(self)(self.data[item], mask=self.mask[item])
192+
return type(self)(self.data[item],
193+
mask=self.mask[item],
194+
dtype=self.dtype)
154195

155196
def _coerce_to_ndarray(self):
156197
""" coerce to an ndarary, preserving my scalar types """
@@ -205,12 +246,12 @@ def take(self, indexer, allow_fill=False, fill_value=None):
205246
result[fill_mask] = fill_value
206247
mask = mask ^ fill_mask
207248

208-
return self._from_sequence(result, mask=mask)
249+
return type(self)(result, mask=mask, dtype=self.dtype)
209250

210251
def copy(self, deep=False):
211252
if deep:
212253
return type(self)(
213-
self.data.copy(), mask=self.mask.copy())
254+
self.data.copy(), mask=self.mask.copy(), dtype=self.dtype)
214255
return type(self)(self)
215256

216257
def __setitem__(self, key, value):
@@ -230,11 +271,23 @@ def __len__(self):
230271
return len(self.data)
231272

232273
def __repr__(self):
274+
"""
275+
Return a string representation for this object.
276+
277+
Invoked by unicode(df) in py2 only. Yields a Unicode String in both
278+
py2/py3.
279+
"""
280+
klass = self.__class__.__name__
281+
data = format_object_summary(self, default_pprint, False)
282+
attrs = format_object_attrs(self)
283+
space = " "
233284

234-
formatted = self._formatting_values()
235-
return '{}({})'.format(
236-
self.__class__.__name__,
237-
formatted.tolist())
285+
prepr = (u(",%s") %
286+
space).join(u("%s=%s") % (k, v) for k, v in attrs)
287+
288+
res = u("%s(%s%s)") % (klass, data, prepr)
289+
290+
return res
238291

239292
@property
240293
def nbytes(self):
@@ -251,7 +304,7 @@ def _na_value(self):
251304
def _concat_same_type(cls, to_concat):
252305
data = np.concatenate([x.data for x in to_concat])
253306
mask = np.concatenate([x.mask for x in to_concat])
254-
return cls(data, mask=mask)
307+
return cls(data, mask=mask, dtype=to_concat[0].dtype)
255308

256309
def astype(self, dtype, copy=True):
257310
"""Cast to a NumPy array with 'dtype'.
@@ -269,7 +322,22 @@ def astype(self, dtype, copy=True):
269322
-------
270323
array : ndarray
271324
NumPy ndarray with 'dtype' for its dtype.
325+
326+
Raises
327+
------
328+
TypeError
329+
if incompatible type with an IntegerDtype, equivalent of same_kind
330+
casting
272331
"""
332+
333+
# if we are astyping to an existing IntegerDtype we can fastpath
334+
if isinstance(dtype, IntegerDtype):
335+
result = self.data.astype(dtype.numpy_dtype,
336+
casting='same_kind', copy=False)
337+
return type(self)(result, mask=self.mask,
338+
dtype=dtype, copy=False)
339+
340+
# coerce
273341
data = self._coerce_to_ndarray()
274342
return data.astype(dtype=dtype, copy=False)
275343

@@ -412,56 +480,37 @@ def integer_arithmetic_method(self, other):
412480
if is_float_dtype(result):
413481
mask |= (result == np.inf) | (result == -np.inf)
414482

415-
return cls(result, mask=mask)
483+
return cls(result, mask=mask, dtype=self.dtype, copy=False)
416484

417485
name = '__{name}__'.format(name=op.__name__)
418486
return set_function_name(integer_arithmetic_method, name, cls)
419487

420488

421-
class UnsignedIntegerArray(IntegerArray):
422-
pass
489+
IntegerArray._add_numeric_methods_binary()
490+
IntegerArray._add_comparison_methods_binary()
423491

424492

425493
module = sys.modules[__name__]
426494

427495

428496
# create the Dtype
429-
types = [(_integer_dtypes, IntegerDtype, _integer_formatter),
430-
(_unsigned_dtypes, UnsignedIntegerDtype, _unsigned_formatter)]
431-
for dtypes, superclass, formatter in types:
497+
_dtypes = {}
498+
for dtype in ['int8', 'int16', 'int32', 'int64',
499+
'uint8', 'uint16', 'uint32', 'uint64']:
432500

433-
for dtype in dtypes:
434-
435-
name = formatter(dtype)
436-
classname = "{}Dtype".format(name)
437-
attributes_dict = {'type': getattr(np, dtype),
438-
'name': name}
439-
dtype_type = type(classname, (superclass, ), attributes_dict)
440-
setattr(module, classname, dtype_type)
441-
442-
# register
443-
registry.register(dtype_type)
444-
445-
446-
# create the Array
447-
types = [(_integer_dtypes, IntegerArray, _integer_formatter),
448-
(_unsigned_dtypes, UnsignedIntegerArray, _unsigned_formatter)]
449-
for dtypes, superclass, formatter in types:
450-
451-
for dtype in dtypes:
452-
453-
dtype_type = getattr(module, "{}Dtype".format(formatter(dtype)))
454-
classname = "{}Array".format(formatter(dtype))
455-
attributes_dict = {'dtype': dtype_type()}
456-
array_type = type(classname, (superclass, ), attributes_dict)
457-
setattr(module, classname, array_type)
458-
459-
# add ops
460-
array_type._add_numeric_methods_binary()
461-
array_type._add_comparison_methods_binary()
462-
463-
# set the Array type on the Dtype
464-
dtype_type.array_type = array_type
501+
if dtype.startswith('u'):
502+
name = "U{}".format(dtype[1:].capitalize())
503+
else:
504+
name = dtype.capitalize()
505+
classname = "{}Dtype".format(name)
506+
attributes_dict = {'type': getattr(np, dtype),
507+
'name': name}
508+
dtype_type = type(classname, (IntegerDtype, ), attributes_dict)
509+
setattr(module, classname, dtype_type)
510+
511+
# register
512+
registry.register(dtype_type)
513+
_dtypes[dtype] = dtype_type()
465514

466515

467516
def make_data():

0 commit comments

Comments
 (0)