@@ -64,6 +64,10 @@ def construct_array_type(cls):
64
64
"""Return the array type associated with this dtype."""
65
65
return JSONArray
66
66
67
+ def __from_arrow__ (self , array : pa .Array | pa .ChunkedArray ) -> JSONArray :
68
+ """Convert the pyarrow array to the extension array."""
69
+ return JSONArray (array )
70
+
67
71
68
72
class JSONArray (arrays .ArrowExtensionArray ):
69
73
"""Extension array that handles BigQuery JSON data, leveraging a string-based
@@ -92,6 +96,10 @@ def __init__(self, values) -> None:
92
96
else :
93
97
raise NotImplementedError (f"Unsupported pandas version: { pd .__version__ } " )
94
98
99
+ def __arrow_array__ (self ):
100
+ """Convert to an arrow array. This is required for pyarrow extension."""
101
+ return self .pa_data
102
+
95
103
@classmethod
96
104
def _box_pa (
97
105
cls , value , pa_type : pa .DataType | None = None
@@ -151,7 +159,12 @@ def _serialize_json(value):
151
159
def _deserialize_json (value ):
152
160
"""A static method that converts a JSON string back into its original value."""
153
161
if not pd .isna (value ):
154
- return json .loads (value )
162
+ # Attempt to interpret the value as a JSON object.
163
+ # If it's not valid JSON, treat it as a regular string.
164
+ try :
165
+ return json .loads (value )
166
+ except json .JSONDecodeError :
167
+ return value
155
168
else :
156
169
return value
157
170
@@ -244,3 +257,39 @@ def __array__(self, dtype=None, copy: bool | None = None) -> np.ndarray:
244
257
result [mask ] = self ._dtype .na_value
245
258
result [~ mask ] = data [~ mask ].pa_data .to_numpy ()
246
259
return result
260
+
261
+
262
+ class ArrowJSONType (pa .ExtensionType ):
263
+ """Arrow extension type for the `dbjson` Pandas extension type."""
264
+
265
+ def __init__ (self ) -> None :
266
+ super ().__init__ (pa .string (), "dbjson" )
267
+
268
+ def __arrow_ext_serialize__ (self ) -> bytes :
269
+ # No parameters are necessary
270
+ return b""
271
+
272
+ def __eq__ (self , other ):
273
+ if isinstance (other , pyarrow .BaseExtensionType ):
274
+ return type (self ) == type (other )
275
+ else :
276
+ return NotImplemented
277
+
278
+ def __ne__ (self , other ) -> bool :
279
+ return not self == other
280
+
281
+ @classmethod
282
+ def __arrow_ext_deserialize__ (cls , storage_type , serialized ) -> ArrowJSONType :
283
+ # return an instance of this subclass
284
+ return ArrowJSONType ()
285
+
286
+ def __hash__ (self ) -> int :
287
+ return hash (str (self ))
288
+
289
+ def to_pandas_dtype (self ):
290
+ return JSONDtype ()
291
+
292
+
293
+ # Register the type to be included in RecordBatches, sent over IPC and received in
294
+ # another Python process.
295
+ pa .register_extension_type (ArrowJSONType ())
0 commit comments