Skip to content

Commit 0d031cc

Browse files
authored
feat: JSON encoding of FixedSizeList (#5646)
Added ability to encode the FixedSizeList type in JSON. The implementation of FixedSizeListEncoder is very similar to that of ListEncoder, but is somewhat simpler, because of the constant offset. A test was added to verify behaviour of the JSON encoder with and without explicit nulls.
1 parent 0124307 commit 0d031cc

File tree

2 files changed

+136
-2
lines changed

2 files changed

+136
-2
lines changed

arrow-json/src/writer.rs

Lines changed: 84 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -834,7 +834,8 @@ mod tests {
834834
use serde_json::json;
835835

836836
use arrow_array::builder::{
837-
FixedSizeBinaryBuilder, Int32Builder, Int64Builder, MapBuilder, StringBuilder,
837+
FixedSizeBinaryBuilder, FixedSizeListBuilder, Int32Builder, Int64Builder, MapBuilder,
838+
StringBuilder,
838839
};
839840
use arrow_buffer::{Buffer, NullBuffer, OffsetBuffer, ToByteSlice};
840841
use arrow_data::ArrayData;
@@ -2215,4 +2216,86 @@ mod tests {
22152216
);
22162217
}
22172218
}
2219+
2220+
#[test]
2221+
fn test_writer_fixed_size_list() {
2222+
let size = 3;
2223+
let field = FieldRef::new(Field::new("item", DataType::Int32, true));
2224+
let schema = SchemaRef::new(Schema::new(vec![Field::new(
2225+
"list",
2226+
DataType::FixedSizeList(field, size),
2227+
true,
2228+
)]));
2229+
2230+
let values_builder = Int32Builder::new();
2231+
let mut list_builder = FixedSizeListBuilder::new(values_builder, size);
2232+
let lists = [
2233+
Some([Some(1), Some(2), None]),
2234+
Some([Some(3), None, Some(4)]),
2235+
Some([None, Some(5), Some(6)]),
2236+
None,
2237+
];
2238+
for list in lists {
2239+
match list {
2240+
Some(l) => {
2241+
for value in l {
2242+
match value {
2243+
Some(v) => list_builder.values().append_value(v),
2244+
None => list_builder.values().append_null(),
2245+
}
2246+
}
2247+
list_builder.append(true);
2248+
}
2249+
None => {
2250+
for _ in 0..size {
2251+
list_builder.values().append_null();
2252+
}
2253+
list_builder.append(false);
2254+
}
2255+
}
2256+
}
2257+
let array = Arc::new(list_builder.finish()) as ArrayRef;
2258+
let batch = RecordBatch::try_new(schema, vec![array]).unwrap();
2259+
2260+
//encode and check JSON with explicit nulls:
2261+
{
2262+
let json_value: Value = {
2263+
let mut buf = Vec::new();
2264+
let mut writer = WriterBuilder::new()
2265+
.with_explicit_nulls(true)
2266+
.build::<_, JsonArray>(&mut buf);
2267+
writer.write(&batch).unwrap();
2268+
writer.close().unwrap();
2269+
serde_json::from_slice(&buf).unwrap()
2270+
};
2271+
assert_eq!(
2272+
json!([
2273+
{"list": [1, 2, null]},
2274+
{"list": [3, null, 4]},
2275+
{"list": [null, 5, 6]},
2276+
{"list": null},
2277+
]),
2278+
json_value
2279+
);
2280+
}
2281+
// encode and check JSON with no explicit nulls:
2282+
{
2283+
let json_value: Value = {
2284+
let mut buf = Vec::new();
2285+
let mut writer = ArrayWriter::new(&mut buf);
2286+
writer.write(&batch).unwrap();
2287+
writer.close().unwrap();
2288+
serde_json::from_slice(&buf).unwrap()
2289+
};
2290+
assert_eq!(
2291+
json!([
2292+
{"list": [1, 2, null]},
2293+
{"list": [3, null, 4]},
2294+
{"list": [null, 5, 6]},
2295+
{}, // empty because nulls are omitted
2296+
]),
2297+
json_value
2298+
);
2299+
}
2300+
}
22182301
}

arrow-json/src/writer/encoder.rs

Lines changed: 52 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,10 @@ fn make_encoder_impl<'a>(
8888
let array = array.as_list::<i64>();
8989
(Box::new(ListEncoder::try_new(array, options)?) as _, array.nulls().cloned())
9090
}
91+
DataType::FixedSizeList(_, _) => {
92+
let array = array.as_fixed_size_list();
93+
(Box::new(FixedSizeListEncoder::try_new(array, options)?) as _, array.nulls().cloned())
94+
}
9195

9296
DataType::Dictionary(_, _) => downcast_dictionary_array! {
9397
array => (Box::new(DictionaryEncoder::try_new(array, options)?) as _, array.logical_nulls()),
@@ -100,7 +104,7 @@ fn make_encoder_impl<'a>(
100104
}
101105

102106
DataType::FixedSizeBinary(_) => {
103-
let array = array.as_any().downcast_ref::<FixedSizeBinaryArray>().unwrap();
107+
let array = array.as_fixed_size_binary();
104108
(Box::new(FixedSizeBinaryEncoder::new(array)) as _, array.nulls().cloned())
105109
}
106110

@@ -329,6 +333,53 @@ impl<'a, O: OffsetSizeTrait> Encoder for ListEncoder<'a, O> {
329333
}
330334
}
331335

336+
struct FixedSizeListEncoder<'a> {
337+
value_length: usize,
338+
nulls: Option<NullBuffer>,
339+
encoder: Box<dyn Encoder + 'a>,
340+
}
341+
342+
impl<'a> FixedSizeListEncoder<'a> {
343+
fn try_new(
344+
array: &'a FixedSizeListArray,
345+
options: &EncoderOptions,
346+
) -> Result<Self, ArrowError> {
347+
let (encoder, nulls) = make_encoder_impl(array.values().as_ref(), options)?;
348+
Ok(Self {
349+
encoder,
350+
nulls,
351+
value_length: array.value_length().as_usize(),
352+
})
353+
}
354+
}
355+
356+
impl<'a> Encoder for FixedSizeListEncoder<'a> {
357+
fn encode(&mut self, idx: usize, out: &mut Vec<u8>) {
358+
let start = idx * self.value_length;
359+
let end = start + self.value_length;
360+
out.push(b'[');
361+
match self.nulls.as_ref() {
362+
Some(n) => (start..end).for_each(|idx| {
363+
if idx != start {
364+
out.push(b',');
365+
}
366+
if n.is_null(idx) {
367+
out.extend_from_slice(b"null");
368+
} else {
369+
self.encoder.encode(idx, out);
370+
}
371+
}),
372+
None => (start..end).for_each(|idx| {
373+
if idx != start {
374+
out.push(b',');
375+
}
376+
self.encoder.encode(idx, out);
377+
}),
378+
}
379+
out.push(b']');
380+
}
381+
}
382+
332383
struct DictionaryEncoder<'a, K: ArrowDictionaryKeyType> {
333384
keys: ScalarBuffer<K::Native>,
334385
encoder: Box<dyn Encoder + 'a>,

0 commit comments

Comments
 (0)