2
2
import warnings
3
3
import numpy as np
4
4
5
+ from pandas .compat import u
5
6
from pandas .core .dtypes .generic import ABCSeries , ABCIndexClass
7
+ from pandas .util ._decorators import cache_readonly
6
8
from pandas .compat import set_function_name
7
9
from pandas .api .types import (is_integer , is_scalar , is_float ,
8
10
is_float_dtype , is_integer_dtype ,
12
14
from pandas .core .dtypes .base import ExtensionDtype
13
15
from pandas .core .dtypes .dtypes import registry
14
16
from pandas .core .dtypes .missing import isna , notna
15
-
16
- # available dtypes
17
- _integer_dtypes = ['int8' , 'int16' , 'int32' , 'int64' ]
18
- _integer_formatter = lambda x : x .capitalize ()
19
- _unsigned_dtypes = ['uint8' , 'uint16' , 'uint32' , 'uint64' ]
20
- _unsigned_formatter = lambda x : "{}{}" .format (x [0 ].upper (), x [1 :].capitalize ())
17
+ from pandas .io .formats .printing import (
18
+ format_object_summary , format_object_attrs , default_pprint )
21
19
22
20
23
21
class IntegerDtype (ExtensionDtype ):
24
22
type = None
25
23
na_value = np .nan
26
- kind = 'i'
27
- is_integer = True
28
- is_signed_integer = True
29
- is_unsigned_integer = False
24
+
25
+ @cache_readonly
26
+ def is_signed_integer (self ):
27
+ return self .kind == 'i'
28
+
29
+ @cache_readonly
30
+ def is_unsigned_integer (self ):
31
+ return self .kind == 'u'
32
+
33
+ @cache_readonly
34
+ def numpy_dtype (self ):
35
+ """ Return an instance of our numpy dtype """
36
+ return np .dtype (self .type )
37
+
38
+ @cache_readonly
39
+ def kind (self ):
40
+ return self .numpy_dtype .kind
41
+
42
+ @classmethod
43
+ def construct_array_type (cls , array ):
44
+ """Return the array type associated with this dtype
45
+
46
+ Parameters
47
+ ----------
48
+ array : value array
49
+
50
+ Returns
51
+ -------
52
+ type
53
+ """
54
+ return IntegerArray
30
55
31
56
@classmethod
32
57
def construct_from_string (cls , string ):
@@ -40,12 +65,6 @@ def construct_from_string(cls, string):
40
65
"'{}'" .format (cls , string ))
41
66
42
67
43
- class UnsignedIntegerDtype (IntegerDtype ):
44
- kind = 'u'
45
- is_signed_integer = False
46
- is_unsigned_integer = True
47
-
48
-
49
68
def to_integer_array (values ):
50
69
"""
51
70
Parameters
@@ -61,13 +80,14 @@ def to_integer_array(values):
61
80
TypeError if incompatible types
62
81
"""
63
82
values = np .array (values , copy = False )
64
- kind = 'UInt' if values .dtype .kind == 'u' else 'Int'
65
- array_type = "{}{}Array" .format (kind , values .dtype .itemsize * 8 )
66
83
try :
67
- array_type = getattr (module , array_type )
68
- except AttributeError :
84
+ dtype = _dtypes [str (values .dtype )]
85
+ except KeyError :
86
+ if is_float_dtype (values ):
87
+ return IntegerArray (values )
88
+
69
89
raise TypeError ("Incompatible dtype for {}" .format (values .dtype ))
70
- return array_type (values , copy = False )
90
+ return IntegerArray (values , dtype = dtype , copy = False )
71
91
72
92
73
93
def coerce_to_array (values , dtype , mask = None , copy = False ):
@@ -86,6 +106,14 @@ def coerce_to_array(values, dtype, mask=None, copy=False):
86
106
-------
87
107
tuple of (values, mask)
88
108
"""
109
+
110
+ if isinstance (values , IntegerArray ):
111
+ values , mask = values .data , values .mask
112
+ if copy :
113
+ values = values .copy ()
114
+ mask = mask .copy ()
115
+ return values , mask
116
+
89
117
values = np .array (values , copy = copy )
90
118
if is_object_dtype (values ):
91
119
inferred_type = infer_dtype (values )
@@ -112,14 +140,23 @@ def coerce_to_array(values, dtype, mask=None, copy=False):
112
140
if is_object_dtype (values ):
113
141
mask |= isna (values )
114
142
143
+ # infer dtype if needed
144
+ if dtype is None :
145
+ if is_integer_dtype (values ):
146
+ dtype = values .dtype
147
+ else :
148
+ dtype = np .dtype ('int64' )
149
+ else :
150
+ dtype = dtype .type
151
+
115
152
# we copy as need to coerce here
116
153
if mask .any ():
117
154
values = values .copy ()
118
155
values [mask ] = 1
119
156
120
- values = values .astype (dtype . type )
157
+ values = values .astype (dtype )
121
158
else :
122
- values = values .astype (dtype . type , copy = False )
159
+ values = values .astype (dtype , copy = False )
123
160
124
161
return values , mask
125
162
@@ -131,26 +168,30 @@ class IntegerArray(ExtensionArray):
131
168
- mask: a boolean array holding a mask on the data, False is missing
132
169
"""
133
170
134
- dtype = None
171
+ @cache_readonly
172
+ def dtype (self ):
173
+ return _dtypes [str (self .data .dtype )]
135
174
136
- def __init__ (self , values , mask = None , copy = False ):
175
+ def __init__ (self , values , mask = None , dtype = None , copy = False ):
137
176
self .data , self .mask = coerce_to_array (
138
- values , dtype = self . dtype , mask = mask , copy = copy )
177
+ values , dtype = dtype , mask = mask , copy = copy )
139
178
140
179
@classmethod
141
- def _from_sequence (cls , scalars , mask = None , copy = False ):
142
- return cls (scalars , mask = mask , copy = copy )
180
+ def _from_sequence (cls , scalars , mask = None , dtype = None , copy = False ):
181
+ return cls (scalars , mask = mask , dtype = dtype , copy = copy )
143
182
144
183
@classmethod
145
184
def _from_factorized (cls , values , original ):
146
- return cls (values )
185
+ return cls (values , dtype = original . dtype )
147
186
148
187
def __getitem__ (self , item ):
149
188
if is_integer (item ):
150
189
if self .mask [item ]:
151
190
return self .dtype .na_value
152
191
return self .data [item ]
153
- return type (self )(self .data [item ], mask = self .mask [item ])
192
+ return type (self )(self .data [item ],
193
+ mask = self .mask [item ],
194
+ dtype = self .dtype )
154
195
155
196
def _coerce_to_ndarray (self ):
156
197
""" coerce to an ndarary, preserving my scalar types """
@@ -205,12 +246,12 @@ def take(self, indexer, allow_fill=False, fill_value=None):
205
246
result [fill_mask ] = fill_value
206
247
mask = mask ^ fill_mask
207
248
208
- return self . _from_sequence (result , mask = mask )
249
+ return type ( self ) (result , mask = mask , dtype = self . dtype )
209
250
210
251
def copy (self , deep = False ):
211
252
if deep :
212
253
return type (self )(
213
- self .data .copy (), mask = self .mask .copy ())
254
+ self .data .copy (), mask = self .mask .copy (), dtype = self . dtype )
214
255
return type (self )(self )
215
256
216
257
def __setitem__ (self , key , value ):
@@ -230,11 +271,23 @@ def __len__(self):
230
271
return len (self .data )
231
272
232
273
def __repr__ (self ):
274
+ """
275
+ Return a string representation for this object.
276
+
277
+ Invoked by unicode(df) in py2 only. Yields a Unicode String in both
278
+ py2/py3.
279
+ """
280
+ klass = self .__class__ .__name__
281
+ data = format_object_summary (self , default_pprint , False )
282
+ attrs = format_object_attrs (self )
283
+ space = " "
233
284
234
- formatted = self ._formatting_values ()
235
- return '{}({})' .format (
236
- self .__class__ .__name__ ,
237
- formatted .tolist ())
285
+ prepr = (u (",%s" ) %
286
+ space ).join (u ("%s=%s" ) % (k , v ) for k , v in attrs )
287
+
288
+ res = u ("%s(%s%s)" ) % (klass , data , prepr )
289
+
290
+ return res
238
291
239
292
@property
240
293
def nbytes (self ):
@@ -251,7 +304,7 @@ def _na_value(self):
251
304
def _concat_same_type (cls , to_concat ):
252
305
data = np .concatenate ([x .data for x in to_concat ])
253
306
mask = np .concatenate ([x .mask for x in to_concat ])
254
- return cls (data , mask = mask )
307
+ return cls (data , mask = mask , dtype = to_concat [ 0 ]. dtype )
255
308
256
309
def astype (self , dtype , copy = True ):
257
310
"""Cast to a NumPy array with 'dtype'.
@@ -269,7 +322,22 @@ def astype(self, dtype, copy=True):
269
322
-------
270
323
array : ndarray
271
324
NumPy ndarray with 'dtype' for its dtype.
325
+
326
+ Raises
327
+ ------
328
+ TypeError
329
+ if incompatible type with an IntegerDtype, equivalent of same_kind
330
+ casting
272
331
"""
332
+
333
+ # if we are astyping to an existing IntegerDtype we can fastpath
334
+ if isinstance (dtype , IntegerDtype ):
335
+ result = self .data .astype (dtype .numpy_dtype ,
336
+ casting = 'same_kind' , copy = False )
337
+ return type (self )(result , mask = self .mask ,
338
+ dtype = dtype , copy = False )
339
+
340
+ # coerce
273
341
data = self ._coerce_to_ndarray ()
274
342
return data .astype (dtype = dtype , copy = False )
275
343
@@ -412,56 +480,37 @@ def integer_arithmetic_method(self, other):
412
480
if is_float_dtype (result ):
413
481
mask |= (result == np .inf ) | (result == - np .inf )
414
482
415
- return cls (result , mask = mask )
483
+ return cls (result , mask = mask , dtype = self . dtype , copy = False )
416
484
417
485
name = '__{name}__' .format (name = op .__name__ )
418
486
return set_function_name (integer_arithmetic_method , name , cls )
419
487
420
488
421
- class UnsignedIntegerArray ( IntegerArray ):
422
- pass
489
+ IntegerArray . _add_numeric_methods_binary ()
490
+ IntegerArray . _add_comparison_methods_binary ()
423
491
424
492
425
493
module = sys .modules [__name__ ]
426
494
427
495
428
496
# create the Dtype
429
- types = [( _integer_dtypes , IntegerDtype , _integer_formatter ),
430
- ( _unsigned_dtypes , UnsignedIntegerDtype , _unsigned_formatter )]
431
- for dtypes , superclass , formatter in types :
497
+ _dtypes = {}
498
+ for dtype in [ 'int8' , 'int16' , 'int32' , 'int64' ,
499
+ 'uint8' , 'uint16' , 'uint32' , 'uint64' ] :
432
500
433
- for dtype in dtypes :
434
-
435
- name = formatter (dtype )
436
- classname = "{}Dtype" .format (name )
437
- attributes_dict = {'type' : getattr (np , dtype ),
438
- 'name' : name }
439
- dtype_type = type (classname , (superclass , ), attributes_dict )
440
- setattr (module , classname , dtype_type )
441
-
442
- # register
443
- registry .register (dtype_type )
444
-
445
-
446
- # create the Array
447
- types = [(_integer_dtypes , IntegerArray , _integer_formatter ),
448
- (_unsigned_dtypes , UnsignedIntegerArray , _unsigned_formatter )]
449
- for dtypes , superclass , formatter in types :
450
-
451
- for dtype in dtypes :
452
-
453
- dtype_type = getattr (module , "{}Dtype" .format (formatter (dtype )))
454
- classname = "{}Array" .format (formatter (dtype ))
455
- attributes_dict = {'dtype' : dtype_type ()}
456
- array_type = type (classname , (superclass , ), attributes_dict )
457
- setattr (module , classname , array_type )
458
-
459
- # add ops
460
- array_type ._add_numeric_methods_binary ()
461
- array_type ._add_comparison_methods_binary ()
462
-
463
- # set the Array type on the Dtype
464
- dtype_type .array_type = array_type
501
+ if dtype .startswith ('u' ):
502
+ name = "U{}" .format (dtype [1 :].capitalize ())
503
+ else :
504
+ name = dtype .capitalize ()
505
+ classname = "{}Dtype" .format (name )
506
+ attributes_dict = {'type' : getattr (np , dtype ),
507
+ 'name' : name }
508
+ dtype_type = type (classname , (IntegerDtype , ), attributes_dict )
509
+ setattr (module , classname , dtype_type )
510
+
511
+ # register
512
+ registry .register (dtype_type )
513
+ _dtypes [dtype ] = dtype_type ()
465
514
466
515
467
516
def make_data ():
0 commit comments