diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py index 2b15cdeb08..263317738e 100644 --- a/pyiceberg/table/__init__.py +++ b/pyiceberg/table/__init__.py @@ -603,8 +603,12 @@ def _(update: SetCurrentSchemaUpdate, base_metadata: TableMetadata, context: _Ta if schema is None: raise ValueError(f"Schema with id {new_schema_id} does not exist") + metadata_updates: Dict[str, Any] = {"current_schema_id": new_schema_id} + if base_metadata.format_version == 1: + metadata_updates["schema_"] = schema + context.add_update(update) - return base_metadata.model_copy(update={"current_schema_id": new_schema_id}) + return base_metadata.model_copy(update=metadata_updates) @_apply_table_update.register(AddPartitionSpecUpdate) @@ -631,17 +635,17 @@ def _(update: SetDefaultSpecUpdate, base_metadata: TableMetadata, context: _Tabl new_spec_id = max(spec.spec_id for spec in base_metadata.partition_specs) if new_spec_id == base_metadata.default_spec_id: return base_metadata - found_spec_id = False - for spec in base_metadata.partition_specs: - found_spec_id = spec.spec_id == new_spec_id - if found_spec_id: - break - if not found_spec_id: + spec = base_metadata.specs().get(new_spec_id) + if spec is None: raise ValueError(f"Failed to find spec with id {new_spec_id}") + metadata_updates: Dict[str, Any] = {"default_spec_id": new_spec_id} + if base_metadata.format_version == 1: + metadata_updates["partition_spec"] = [field.model_dump() for field in spec.fields] + context.add_update(update) - return base_metadata.model_copy(update={"default_spec_id": new_spec_id}) + return base_metadata.model_copy(update=metadata_updates) @_apply_table_update.register(AddSnapshotUpdate) diff --git a/tests/conftest.py b/tests/conftest.py index 205cd2203f..4750296e0b 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -532,6 +532,7 @@ def all_avro_types() -> Dict[str, Any]: "last-column-id": 3, "schema": { "type": "struct", + "identifier-field-ids": [1, 2], "fields": [ {"id": 1, "name": "x", "required": True, "type": "long"}, {"id": 2, "name": "y", "required": True, "type": "long", "doc": "comment"}, diff --git a/tests/integration/test_partition_evolution.py b/tests/integration/test_partition_evolution.py index 85ae32374d..7f5eacc3cb 100644 --- a/tests/integration/test_partition_evolution.py +++ b/tests/integration/test_partition_evolution.py @@ -92,7 +92,7 @@ def _table_v2(catalog: Catalog) -> Table: def _create_table_with_schema(catalog: Catalog, schema: Schema, format_version: str) -> Table: - tbl_name = "default.test_schema_evolution" + tbl_name = "default.test_partition_evolution" try: catalog.drop_table(tbl_name) except NoSuchTableError: @@ -488,3 +488,5 @@ def _validate_new_partition_fields( assert len(spec.fields) == len(expected_partition_fields) for i in range(len(spec.fields)): assert spec.fields[i] == expected_partition_fields[i] + if table.format_version == 1: + assert table.metadata.partition_spec == [field.model_dump() for field in spec.fields] diff --git a/tests/table/test_init.py b/tests/table/test_init.py index bb212d696e..6e59ff47d2 100644 --- a/tests/table/test_init.py +++ b/tests/table/test_init.py @@ -607,15 +607,16 @@ def test_apply_add_schema_update(table_v2: Table) -> None: assert test_context.is_added_schema(2) -def test_update_metadata_table_schema(table_v2: Table) -> None: - transaction = table_v2.transaction() +@pytest.mark.parametrize("table", [pytest.lazy_fixture("table_v1"), pytest.lazy_fixture("table_v2")]) +def test_update_metadata_table_schema(table: Table) -> None: + base_current_schema_id = table.metadata.current_schema_id + transaction = table.transaction() update = transaction.update_schema() update.add_column(path="b", field_type=IntegerType()) update.commit() - new_metadata = update_table_metadata(table_v2.metadata, transaction._updates) # pylint: disable=W0212 - apply_schema: Schema = next(schema for schema in new_metadata.schemas if schema.schema_id == 2) + new_metadata = update_table_metadata(table.metadata, transaction._updates) # pylint: disable=W0212 + apply_schema: Schema = new_metadata.schema() assert len(apply_schema.fields) == 4 - assert apply_schema == Schema( NestedField(field_id=1, name="x", field_type=LongType(), required=True), NestedField(field_id=2, name="y", field_type=LongType(), required=True, doc="comment"), @@ -623,10 +624,13 @@ def test_update_metadata_table_schema(table_v2: Table) -> None: NestedField(field_id=4, name="b", field_type=IntegerType(), required=False), identifier_field_ids=[1, 2], ) - assert apply_schema.schema_id == 2 + assert apply_schema.schema_id == base_current_schema_id + 1 assert apply_schema.highest_field_id == 4 + assert new_metadata.current_schema_id == base_current_schema_id + 1 + assert apply_schema == new_metadata.schema() - assert new_metadata.current_schema_id == 2 + if table.metadata.format_version == 1: + assert new_metadata.schema_ == apply_schema def test_update_metadata_add_snapshot(table_v2: Table) -> None: diff --git a/tests/table/test_metadata.py b/tests/table/test_metadata.py index b4e30a6b84..bd2db85f19 100644 --- a/tests/table/test_metadata.py +++ b/tests/table/test_metadata.py @@ -119,7 +119,7 @@ def test_v1_metadata_parsing_directly(example_table_metadata_v1: Dict[str, Any]) NestedField(field_id=1, name="x", field_type=LongType(), required=True), NestedField(field_id=2, name="y", field_type=LongType(), required=True, doc="comment"), NestedField(field_id=3, name="z", field_type=LongType(), required=True), - identifier_field_ids=[], + identifier_field_ids=[1, 2], ) ] assert table_metadata.schemas[0].schema_id == 0 @@ -168,7 +168,7 @@ def test_updating_metadata(example_table_metadata_v2: Dict[str, Any]) -> None: def test_serialize_v1(example_table_metadata_v1: Dict[str, Any]) -> None: table_metadata = TableMetadataV1(**example_table_metadata_v1) table_metadata_json = table_metadata.model_dump_json() - expected = """{"location":"s3://bucket/test/location","table-uuid":"d20125c8-7284-442c-9aea-15fee620737c","last-updated-ms":1602638573874,"last-column-id":3,"schemas":[{"type":"struct","fields":[{"id":1,"name":"x","type":"long","required":true},{"id":2,"name":"y","type":"long","required":true,"doc":"comment"},{"id":3,"name":"z","type":"long","required":true}],"schema-id":0,"identifier-field-ids":[]}],"current-schema-id":0,"partition-specs":[{"spec-id":0,"fields":[{"source-id":1,"field-id":1000,"transform":"identity","name":"x"}]}],"default-spec-id":0,"last-partition-id":1000,"properties":{},"snapshots":[{"snapshot-id":1925,"timestamp-ms":1602638573822}],"snapshot-log":[],"metadata-log":[],"sort-orders":[{"order-id":0,"fields":[]}],"default-sort-order-id":0,"refs":{},"format-version":1,"schema":{"type":"struct","fields":[{"id":1,"name":"x","type":"long","required":true},{"id":2,"name":"y","type":"long","required":true,"doc":"comment"},{"id":3,"name":"z","type":"long","required":true}],"schema-id":0,"identifier-field-ids":[]},"partition-spec":[{"name":"x","transform":"identity","source-id":1,"field-id":1000}]}""" + expected = """{"location":"s3://bucket/test/location","table-uuid":"d20125c8-7284-442c-9aea-15fee620737c","last-updated-ms":1602638573874,"last-column-id":3,"schemas":[{"type":"struct","fields":[{"id":1,"name":"x","type":"long","required":true},{"id":2,"name":"y","type":"long","required":true,"doc":"comment"},{"id":3,"name":"z","type":"long","required":true}],"schema-id":0,"identifier-field-ids":[1,2]}],"current-schema-id":0,"partition-specs":[{"spec-id":0,"fields":[{"source-id":1,"field-id":1000,"transform":"identity","name":"x"}]}],"default-spec-id":0,"last-partition-id":1000,"properties":{},"snapshots":[{"snapshot-id":1925,"timestamp-ms":1602638573822}],"snapshot-log":[],"metadata-log":[],"sort-orders":[{"order-id":0,"fields":[]}],"default-sort-order-id":0,"refs":{},"format-version":1,"schema":{"type":"struct","fields":[{"id":1,"name":"x","type":"long","required":true},{"id":2,"name":"y","type":"long","required":true,"doc":"comment"},{"id":3,"name":"z","type":"long","required":true}],"schema-id":0,"identifier-field-ids":[1,2]},"partition-spec":[{"name":"x","transform":"identity","source-id":1,"field-id":1000}]}""" assert table_metadata_json == expected