From 5438cf3b2535922317f864d05449b77160303d5c Mon Sep 17 00:00:00 2001 From: Yongkyun Lee Date: Thu, 21 Aug 2025 20:26:22 -0700 Subject: [PATCH 1/3] Support more default types in avro to arrow schema conversion --- arrow-avro/src/schema.rs | 75 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 73 insertions(+), 2 deletions(-) diff --git a/arrow-avro/src/schema.rs b/arrow-avro/src/schema.rs index 2f1c0a2bcffc..49a8a00759d1 100644 --- a/arrow-avro/src/schema.rs +++ b/arrow-avro/src/schema.rs @@ -215,8 +215,8 @@ pub struct Field<'a> { #[serde(borrow)] pub r#type: Schema<'a>, /// Optional default value for this field - #[serde(borrow, default)] - pub default: Option<&'a str>, + #[serde(default)] + pub default: Option, } /// An enumeration @@ -1767,4 +1767,75 @@ mod tests { let avro = AvroSchema::try_from(&schema).unwrap(); assert_json_contains(&avro.json_string, "\"arrowDurationUnit\":\"second\""); } + + #[test] + fn test_schema_with_non_string_defaults_decodes_successfully() { + let schema_json = r#"{ + "type": "record", + "name": "R", + "fields": [ + {"name": "a", "type": "int", "default": 0}, + {"name": "b", "type": {"type": "array", "items": "long"}, "default": [1, 2, 3]}, + {"name": "c", "type": {"type": "map", "values": "double"}, "default": {"x": 1.5, "y": 2.5}}, + {"name": "inner", "type": {"type": "record", "name": "Inner", "fields": [ + {"name": "flag", "type": "boolean", "default": true}, + {"name": "name", "type": "string", "default": "hi"} + ]}, "default": {"flag": false, "name": "d"}}, + {"name": "u", "type": ["int", "null"], "default": 42} + ] + }"#; + + let schema: Schema = serde_json::from_str(schema_json).expect("schema should parse"); + match &schema { + Schema::Complex(ComplexType::Record(_)) => {} + other => panic!("expected record schema, got: {:?}", other), + } + // Avro->Arrow conversion should succeed + let field = crate::codec::AvroField::try_from(&schema) + .expect("Avro->Arrow conversion should succeed"); + let arrow_field = field.field(); + + // Build expected Arrow field precisely + let expected_list_item = ArrowField::new( + arrow_schema::Field::LIST_FIELD_DEFAULT_NAME, + DataType::Int64, + false, + ); + let expected_b = ArrowField::new("b", DataType::List(Arc::new(expected_list_item)), false); + + let expected_map_value = ArrowField::new("value", DataType::Float64, false); + let expected_entries = ArrowField::new( + "entries", + DataType::Struct(Fields::from(vec![ + ArrowField::new("key", DataType::Utf8, false), + expected_map_value, + ])), + false, + ); + let expected_c = + ArrowField::new("c", DataType::Map(Arc::new(expected_entries), false), false); + + let expected_inner = ArrowField::new( + "inner", + DataType::Struct(Fields::from(vec![ + ArrowField::new("flag", DataType::Boolean, false), + ArrowField::new("name", DataType::Utf8, false), + ])), + false, + ); + + let expected = ArrowField::new( + "R", + DataType::Struct(Fields::from(vec![ + ArrowField::new("a", DataType::Int32, false), + expected_b, + expected_c, + expected_inner, + ArrowField::new("u", DataType::Int32, true), + ])), + false, + ); + + assert_eq!(arrow_field, expected); + } } From 95e7af4515c5a9857a5e048b5ec129ebf9ecb9a0 Mon Sep 17 00:00:00 2001 From: Yongkyun Lee Date: Sat, 23 Aug 2025 14:49:39 -0700 Subject: [PATCH 2/3] Make comment more concise --- arrow-avro/src/schema.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arrow-avro/src/schema.rs b/arrow-avro/src/schema.rs index 49a8a00759d1..76e8801ae15e 100644 --- a/arrow-avro/src/schema.rs +++ b/arrow-avro/src/schema.rs @@ -1790,12 +1790,12 @@ mod tests { Schema::Complex(ComplexType::Record(_)) => {} other => panic!("expected record schema, got: {:?}", other), } - // Avro->Arrow conversion should succeed + // Avro to Arrow conversion let field = crate::codec::AvroField::try_from(&schema) .expect("Avro->Arrow conversion should succeed"); let arrow_field = field.field(); - // Build expected Arrow field precisely + // Build expected Arrow field let expected_list_item = ArrowField::new( arrow_schema::Field::LIST_FIELD_DEFAULT_NAME, DataType::Int64, From c03f9d64903ba4cd121060704de0e5b879364b73 Mon Sep 17 00:00:00 2001 From: Yongkyun Lee Date: Sat, 23 Aug 2025 21:07:53 -0700 Subject: [PATCH 3/3] Address comment to use Value without serde_json --- arrow-avro/src/schema.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arrow-avro/src/schema.rs b/arrow-avro/src/schema.rs index 76e8801ae15e..a631119466bd 100644 --- a/arrow-avro/src/schema.rs +++ b/arrow-avro/src/schema.rs @@ -216,7 +216,7 @@ pub struct Field<'a> { pub r#type: Schema<'a>, /// Optional default value for this field #[serde(default)] - pub default: Option, + pub default: Option, } /// An enumeration