From 6f841db37d1b8a2fa35f8e43b476905937d09199 Mon Sep 17 00:00:00 2001 From: Fokko Date: Tue, 11 Feb 2025 14:27:42 +0100 Subject: [PATCH] Support reading initial-defaults --- pyiceberg/io/pyarrow.py | 7 +++++-- tests/io/test_pyarrow.py | 11 +++++++++++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/pyiceberg/io/pyarrow.py b/pyiceberg/io/pyarrow.py index 78be8f4b50..c6ff407234 100644 --- a/pyiceberg/io/pyarrow.py +++ b/pyiceberg/io/pyarrow.py @@ -1768,9 +1768,12 @@ def struct( array = self._cast_if_needed(field, field_array) field_arrays.append(array) fields.append(self._construct_field(field, array.type)) - elif field.optional: + elif field.optional or field.initial_default is not None: arrow_type = schema_to_pyarrow(field.field_type, include_field_ids=False) - field_arrays.append(pa.nulls(len(struct_array), type=arrow_type)) + if field.initial_default is None: + field_arrays.append(pa.nulls(len(struct_array), type=arrow_type)) + else: + field_arrays.append(pa.repeat(field.initial_default, len(struct_array))) fields.append(self._construct_field(field, arrow_type)) else: raise ResolveError(f"Field is required, and could not be found in the file: {field}") diff --git a/tests/io/test_pyarrow.py b/tests/io/test_pyarrow.py index e2be7872a9..ae81f61467 100644 --- a/tests/io/test_pyarrow.py +++ b/tests/io/test_pyarrow.py @@ -2203,6 +2203,17 @@ def test_identity_partition_on_multi_columns() -> None: ) == arrow_table.sort_by([("born_year", "ascending"), ("n_legs", "ascending"), ("animal", "ascending")]) +def test_initial_value() -> None: + # Have some fake data, otherwise it will generate a table without records + data = pa.record_batch([pa.nulls(10, pa.int32())], ["some_field"]) + result = _to_requested_schema( + Schema(NestedField(1, "so-true", BooleanType(), required=True, initial_default=True)), Schema(), data + ) + assert result.column_names == ["so-true"] + for val in result[0]: + assert val.as_py() is True + + def test__to_requested_schema_timestamps( arrow_table_schema_with_all_timestamp_precisions: pa.Schema, arrow_table_with_all_timestamp_precisions: pa.Table,