Skip to content
This repository was archived by the owner on Apr 10, 2024. It is now read-only.

Commit 29df124

Browse files
Joshua Storckwesm
Joshua Storck
authored andcommitted
[pandas 2.0] Re-factor of the DataType classes
* Changing PrimitiveType to NumericType * Removing the macro for declaring sub-types of NumericType and instead using template arguments * Addding a static SINGLETON member to the NumericType base class instead of the macros for methods for creating singletons for each numeric type * Removing NullType's inheritance of PrimitiveType * Removing intermediate base class between {Integer,Floating}ArrayImpl and PrimitiveType and just making a concrete template based class {Integer,Floating}Array * Changing FLOAT and DOUBLE to FLOAT32 and FLOAT64 Author: Joshua Storck <[email protected]> Closes #55 from joshuastorck/pandas-2.0 and squashes the following commits: 1614d44 [Joshua Storck] * Fixing native.p{xd,yx} so that it matches the latest C++ code * Changing PrimitiveType to NumericType * Removing the macro for declaring sub-types of NumericType and instead using template arguments * Addding a static SINGLETON member to the NumericType base class instead of the macros for methods for creating singletons for each numeric type * Removing NullType's inheritance of PrimitiveType * Removing intermediate base class between {Integer,Floating}ArrayImpl and PrimitiveType and just making a concrete template based class {Integer,Floating}Array * Changing FLOAT and DOUBLE to FLOAT32 and FLOAT64
1 parent 2d4c8f9 commit 29df124

File tree

10 files changed

+209
-203
lines changed

10 files changed

+209
-203
lines changed

pandas/native.pxd

+23-21
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ cdef extern from "<iostream>":
2727
pass
2828

2929

30-
cdef extern from "pandas/status.h" namespace "pandas" nogil:
30+
cdef extern from "pandas/common.h" namespace "pandas" nogil:
3131

3232
# We can later add more of the common status factory methods as needed
3333
cdef Status Status_OK "Status::OK"()
@@ -44,28 +44,30 @@ cdef extern from "pandas/status.h" namespace "pandas" nogil:
4444
c_bool IsUnknownError()
4545
c_bool IsNotImplemented()
4646

47-
cdef extern from "pandas/api.h" namespace "pandas":
47+
cdef extern from "pandas/api.h" namespace "pandas::DataType":
4848

4949
enum TypeId:
50-
TypeId_NA " pandas::DataType::NA"
51-
TypeId_UINT8 " pandas::DataType::UINT8"
52-
TypeId_UINT16 " pandas::DataType::UINT16"
53-
TypeId_UINT32 " pandas::DataType::UINT32"
54-
TypeId_UINT64 " pandas::DataType::UINT64"
55-
TypeId_INT8 " pandas::DataType::INT8"
56-
TypeId_INT16 " pandas::DataType::INT16"
57-
TypeId_INT32 " pandas::DataType::INT32"
58-
TypeId_INT64 " pandas::DataType::INT64"
59-
TypeId_BOOL " pandas::DataType::BOOL"
60-
TypeId_FLOAT " pandas::DataType::FLOAT"
61-
TypeId_DOUBLE " pandas::DataType::DOUBLE"
62-
TypeId_PYOBJECT " pandas::DataType::PYOBJECT"
63-
TypeId_CATEGORY " pandas::DataType::CATEGORY"
64-
TypeId_TIMESTAMP " pandas::DataType::TIMESTAMP"
65-
TypeId_TIMESTAMP_TZ " pandas::DataType::TIMESTAMP_TZ"
50+
TypeId_NA " pandas::DataType::TypeId::NA"
51+
TypeId_UINT8 " pandas::DataType::TypeId::UINT8"
52+
TypeId_UINT16 " pandas::DataType::TypeId::UINT16"
53+
TypeId_UINT32 " pandas::DataType::TypeId::UINT32"
54+
TypeId_UINT64 " pandas::DataType::TypeId::UINT64"
55+
TypeId_INT8 " pandas::DataType::TypeId::INT8"
56+
TypeId_INT16 " pandas::DataType::TypeId::INT16"
57+
TypeId_INT32 " pandas::DataType::TypeId::INT32"
58+
TypeId_INT64 " pandas::DataType::TypeId::INT64"
59+
TypeId_BOOL " pandas::DataType::TypeId::BOOL"
60+
TypeId_FLOAT32 " pandas::DataType::TypeId::FLOAT32"
61+
TypeId_FLOAT64 " pandas::DataType::TypeId::FLOAT64"
62+
TypeId_PYOBJECT " pandas::DataType::TypeId::PYOBJECT"
63+
TypeId_CATEGORY " pandas::DataType::TypeId::CATEGORY"
64+
TypeId_TIMESTAMP " pandas::DataType::TypeId::TIMESTAMP"
65+
TypeId_TIMESTAMP_TZ " pandas::DataType::TypeId::TIMESTAMP_TZ"
66+
67+
cdef extern from "pandas/api.h" namespace "pandas":
6668

6769
cdef cppclass DataType:
68-
TypeId type
70+
TypeId type()
6971

7072
DataType()
7173

@@ -116,8 +118,8 @@ cdef extern from "pandas/api.h" namespace "pandas":
116118
TypeId type_id()
117119
size_t length()
118120

119-
object GetValue(size_t i)
120-
void SetValue(size_t i, object val)
121+
object GetItem(size_t i)
122+
void SetItem(size_t i, object val)
121123

122124
cdef cppclass CCategoryArray" pandas::CategoryArray"(CArray):
123125
pass

pandas/native.pyx

+5-5
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,7 @@ cdef class Array:
209209
cdef inline _getitem(self, size_t i):
210210
if i >= self.ap.length():
211211
raise IndexError('Out of bounds: %d' % i)
212-
return self.ap.GetValue(i)
212+
return self.ap.GetItem(i)
213213

214214
def __setitem__(self, i, val):
215215
cdef:
@@ -226,7 +226,7 @@ cdef class Array:
226226
cdef inline _setitem(self, size_t i, object val):
227227
if i >= self.ap.length():
228228
raise IndexError('Out of bounds: %d' % i)
229-
self.ap.SetValue(i, val)
229+
self.ap.SetItem(i, val)
230230

231231
def slice(self, start, end):
232232
pass
@@ -251,7 +251,7 @@ cdef class Float32Array(FloatingArray):
251251

252252
cdef class BooleanArray(Array):
253253
cdef:
254-
lp.cBooleanArray* inst
254+
lp.CBooleanArray* inst
255255

256256
cdef init(self, const ArrayPtr& arr):
257257
Array.init(self, arr)
@@ -265,7 +265,7 @@ cdef Array wrap_array(const lp.ArrayPtr& arr):
265265
cdef:
266266
Array result
267267

268-
if arr.get().type_enum() == lp.TypeId_CATEGORY:
268+
if arr.get().type_id() == lp.TypeId_CATEGORY:
269269
result = CategoryArray()
270270
else:
271271
result = Array()
@@ -280,7 +280,7 @@ cdef PandasType wrap_type(const lp.TypePtr& sp_type):
280280
lp.DataType* type = sp_type.get()
281281
PandasType result
282282

283-
if type.type == lp.TypeId_CATEGORY:
283+
if type.type() == lp.TypeId_CATEGORY:
284284
result = Category()
285285
else:
286286
result = PandasType()

src/pandas/array-test.cc

+1-1
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ class TestArray : public ::testing::Test {
4040
TEST_F(TestArray, Attrs) {
4141
DoubleType ex_type;
4242
ASSERT_TRUE(array_->type()->Equals(ex_type));
43-
ASSERT_EQ(DataType::DOUBLE, array_->type_id());
43+
ASSERT_EQ(DataType::FLOAT64, array_->type_id());
4444

4545
ASSERT_EQ(values_.size(), array_->length());
4646
}

src/pandas/array.h

+2
Original file line numberDiff line numberDiff line change
@@ -92,4 +92,6 @@ class ArrayView {
9292
int64_t length_;
9393
};
9494

95+
using ArrayPtr = std::shared_ptr<Array>;
96+
9597
} // namespace pandas

src/pandas/dispatch.cc

+2-2
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@ Status primitive_type_from_enum(DataType::TypeId tp_enum, DataType** out) {
2222
MAKE_TYPE_CASE(DataType::UINT16, UInt16);
2323
MAKE_TYPE_CASE(DataType::UINT32, UInt32);
2424
MAKE_TYPE_CASE(DataType::UINT64, UInt64);
25-
MAKE_TYPE_CASE(DataType::FLOAT, Float);
26-
MAKE_TYPE_CASE(DataType::DOUBLE, Double);
25+
MAKE_TYPE_CASE(DataType::FLOAT32, Float);
26+
MAKE_TYPE_CASE(DataType::FLOAT64, Double);
2727
MAKE_TYPE_CASE(DataType::BOOL, Boolean);
2828
MAKE_TYPE_CASE(DataType::PYOBJECT, PyObject);
2929
default:

src/pandas/numpy_interop.cc

+2-2
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,8 @@ Status numpy_type_num_to_pandas(int type_num, DataType::TypeId* pandas_type) {
5050
TYPE_MAP_CASE(UINT16, UINT16);
5151
TYPE_MAP_CASE(UINT32, UINT32);
5252
TYPE_MAP_CASE(UINT64, UINT64);
53-
TYPE_MAP_CASE(FLOAT32, FLOAT);
54-
TYPE_MAP_CASE(FLOAT64, DOUBLE);
53+
TYPE_MAP_CASE(FLOAT32, FLOAT32);
54+
TYPE_MAP_CASE(FLOAT64, FLOAT64);
5555
TYPE_MAP_CASE(BOOL, BOOL);
5656
TYPE_MAP_CASE(OBJECT, PYOBJECT);
5757
default:

src/pandas/type.cc

+13
Original file line numberDiff line numberDiff line change
@@ -28,4 +28,17 @@ std::string TimestampType::ToString() const {
2828
return ss.str();
2929
}
3030

31+
// Constexpr numeric type names
32+
constexpr const char* UInt8Type::NAME;
33+
constexpr const char* Int8Type::NAME;
34+
constexpr const char* UInt16Type::NAME;
35+
constexpr const char* Int16Type::NAME;
36+
constexpr const char* UInt32Type::NAME;
37+
constexpr const char* Int32Type::NAME;
38+
constexpr const char* UInt64Type::NAME;
39+
constexpr const char* Int64Type::NAME;
40+
constexpr const char* FloatType::NAME;
41+
constexpr const char* DoubleType::NAME;
42+
constexpr const char* BooleanType::NAME;
43+
3144
} // namespace pandas

src/pandas/type.h

+65-41
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,10 @@ class DataType {
3232
BOOL = 9,
3333

3434
// 4-byte floating point value
35-
FLOAT = 10,
35+
FLOAT32 = 10,
3636

3737
// 8-byte floating point value
38-
DOUBLE = 11,
38+
FLOAT64 = 11,
3939

4040
// PyObject*
4141
PYOBJECT = 12,
@@ -94,72 +94,96 @@ class PANDAS_EXPORT PyObjectType : public DataType {
9494
std::string ToString() const override;
9595
};
9696

97-
template <typename Derived>
98-
class PANDAS_EXPORT PrimitiveType : public DataType {
97+
template <typename DERIVED, typename C_TYPE, DataType::TypeId TYPE_ID,
98+
std::size_t SIZE = sizeof(C_TYPE)>
99+
class PANDAS_EXPORT NumericType : public DataType {
99100
public:
100-
PrimitiveType() : DataType(Derived::type_enum) {}
101+
using c_type = C_TYPE;
102+
static constexpr DataType::TypeId type_id = TYPE_ID;
103+
static constexpr size_t size = SIZE;
101104

102-
std::string ToString() const override {
103-
return std::string(static_cast<const Derived*>(this)->name());
104-
}
105+
NumericType() : DataType(type_id) {}
106+
107+
std::string ToString() const override { return std::string(DERIVED::NAME); }
108+
109+
static std::shared_ptr<DERIVED> SINGLETON;
105110
};
106111

107-
#define PRIMITIVE_DECL(TYPENAME, C_TYPE, ENUM, SIZE, NAME) \
108-
public: \
109-
typedef C_TYPE c_type; \
110-
static constexpr DataType::TypeId type_enum = DataType::ENUM; \
111-
static constexpr size_t size = SIZE; \
112-
\
113-
explicit TYPENAME() : PrimitiveType<TYPENAME>() {} \
114-
\
115-
static const char* name() { return NAME; }
112+
template <typename DERIVED, typename C_TYPE, DataType::TypeId TYPE_ID, std::size_t SIZE>
113+
std::shared_ptr<DERIVED> NumericType<DERIVED, C_TYPE, TYPE_ID, SIZE>::SINGLETON(
114+
std::move(std::make_shared<DERIVED>()));
115+
116+
class PANDAS_EXPORT NullType : public DataType {
117+
public:
118+
NullType() : DataType(DataType::TypeId::NA) {}
116119

117-
class PANDAS_EXPORT NullType : public PrimitiveType<NullType> {
118-
PRIMITIVE_DECL(NullType, void, NA, 0, "null");
120+
std::string ToString() const override { return std::string("null"); }
119121
};
120122

121-
class PANDAS_EXPORT UInt8Type : public PrimitiveType<UInt8Type> {
122-
PRIMITIVE_DECL(UInt8Type, uint8_t, UINT8, 1, "uint8");
123+
class PANDAS_EXPORT UInt8Type
124+
: public NumericType<UInt8Type, std::uint8_t, DataType::TypeId::UINT8> {
125+
public:
126+
constexpr static const char* NAME = "uint8";
123127
};
124128

125-
class PANDAS_EXPORT Int8Type : public PrimitiveType<Int8Type> {
126-
PRIMITIVE_DECL(Int8Type, int8_t, INT8, 1, "int8");
129+
class PANDAS_EXPORT Int8Type
130+
: public NumericType<Int8Type, std::int8_t, DataType::TypeId::INT8> {
131+
public:
132+
constexpr static const char* NAME = "int8";
127133
};
128134

129-
class PANDAS_EXPORT UInt16Type : public PrimitiveType<UInt16Type> {
130-
PRIMITIVE_DECL(UInt16Type, uint16_t, UINT16, 2, "uint16");
135+
class PANDAS_EXPORT UInt16Type
136+
: public NumericType<UInt16Type, std::uint16_t, DataType::TypeId::UINT16> {
137+
public:
138+
constexpr static const char* NAME = "uint16";
131139
};
132140

133-
class PANDAS_EXPORT Int16Type : public PrimitiveType<Int16Type> {
134-
PRIMITIVE_DECL(Int16Type, int16_t, INT16, 2, "int16");
141+
class PANDAS_EXPORT Int16Type
142+
: public NumericType<Int16Type, std::int16_t, DataType::TypeId::INT16> {
143+
public:
144+
constexpr static const char* NAME = "int16";
135145
};
136146

137-
class PANDAS_EXPORT UInt32Type : public PrimitiveType<UInt32Type> {
138-
PRIMITIVE_DECL(UInt32Type, uint32_t, UINT32, 4, "uint32");
147+
class PANDAS_EXPORT UInt32Type
148+
: public NumericType<UInt32Type, std::uint32_t, DataType::TypeId::UINT32> {
149+
public:
150+
constexpr static const char* NAME = "uint32";
139151
};
140152

141-
class PANDAS_EXPORT Int32Type : public PrimitiveType<Int32Type> {
142-
PRIMITIVE_DECL(Int32Type, int32_t, INT32, 4, "int32");
153+
class PANDAS_EXPORT Int32Type
154+
: public NumericType<Int32Type, std::int32_t, DataType::TypeId::INT32> {
155+
public:
156+
constexpr static const char* NAME = "int32";
143157
};
144158

145-
class PANDAS_EXPORT UInt64Type : public PrimitiveType<UInt64Type> {
146-
PRIMITIVE_DECL(UInt64Type, uint64_t, UINT64, 8, "uint64");
159+
class PANDAS_EXPORT UInt64Type
160+
: public NumericType<UInt64Type, std::uint64_t, DataType::TypeId::UINT64> {
161+
public:
162+
constexpr static const char* NAME = "uint64";
147163
};
148164

149-
class PANDAS_EXPORT Int64Type : public PrimitiveType<Int64Type> {
150-
PRIMITIVE_DECL(Int64Type, int64_t, INT64, 8, "int64");
165+
class PANDAS_EXPORT Int64Type
166+
: public NumericType<Int64Type, std::int64_t, DataType::TypeId::INT64> {
167+
public:
168+
constexpr static const char* NAME = "int64";
151169
};
152170

153-
class PANDAS_EXPORT FloatType : public PrimitiveType<FloatType> {
154-
PRIMITIVE_DECL(FloatType, float, FLOAT, 4, "float");
171+
class PANDAS_EXPORT FloatType
172+
: public NumericType<FloatType, float, DataType::TypeId::FLOAT32> {
173+
public:
174+
constexpr static const char* NAME = "float32";
155175
};
156176

157-
class PANDAS_EXPORT DoubleType : public PrimitiveType<DoubleType> {
158-
PRIMITIVE_DECL(DoubleType, double, DOUBLE, 8, "double");
177+
class PANDAS_EXPORT DoubleType
178+
: public NumericType<DoubleType, double, DataType::TypeId::FLOAT64> {
179+
public:
180+
constexpr static const char* NAME = "float64";
159181
};
160182

161-
class PANDAS_EXPORT BooleanType : public PrimitiveType<BooleanType> {
162-
PRIMITIVE_DECL(BooleanType, uint8_t, BOOL, 1, "bool");
183+
class PANDAS_EXPORT BooleanType
184+
: public NumericType<BooleanType, std::uint8_t, DataType::TypeId::BOOL> {
185+
public:
186+
constexpr static const char* NAME = "bool";
163187
};
164188

165189
} // namespace pandas

0 commit comments

Comments
 (0)