diff --git a/pyiceberg/avro/resolver.py b/pyiceberg/avro/resolver.py index 9ed111ff40..c4ec393513 100644 --- a/pyiceberg/avro/resolver.py +++ b/pyiceberg/avro/resolver.py @@ -290,7 +290,7 @@ def struct(self, file_schema: StructType, record_struct: Optional[IcebergType], # There is a default value if file_field.write_default is not None: # The field is not in the record, but there is a write default value - results.append((None, DefaultWriter(writer=writer, value=file_field.write_default))) # type: ignore + results.append((None, DefaultWriter(writer=writer, value=file_field.write_default))) elif file_field.required: raise ValueError(f"Field is required, and there is no write default: {file_field}") else: diff --git a/pyiceberg/conversions.py b/pyiceberg/conversions.py index fe208b4aca..7bf7b462e2 100644 --- a/pyiceberg/conversions.py +++ b/pyiceberg/conversions.py @@ -503,27 +503,47 @@ def _(_: Union[IntegerType, LongType], val: int) -> int: @from_json.register(DateType) -def _(_: DateType, val: str) -> date: +def _(_: DateType, val: Union[str, int, date]) -> date: """JSON date is string encoded.""" - return days_to_date(date_str_to_days(val)) + if isinstance(val, str): + val = date_str_to_days(val) + if isinstance(val, int): + return days_to_date(val) + else: + return val @from_json.register(TimeType) -def _(_: TimeType, val: str) -> time: +def _(_: TimeType, val: Union[str, int, time]) -> time: """JSON ISO8601 string into Python time.""" - return micros_to_time(time_str_to_micros(val)) + if isinstance(val, str): + val = time_str_to_micros(val) + if isinstance(val, int): + return micros_to_time(val) + else: + return val @from_json.register(TimestampType) -def _(_: PrimitiveType, val: str) -> datetime: +def _(_: PrimitiveType, val: Union[str, int, datetime]) -> datetime: """JSON ISO8601 string into Python datetime.""" - return micros_to_timestamp(timestamp_to_micros(val)) + if isinstance(val, str): + val = timestamp_to_micros(val) + if isinstance(val, int): + return micros_to_timestamp(val) + else: + return val @from_json.register(TimestamptzType) -def _(_: TimestamptzType, val: str) -> datetime: +def _(_: TimestamptzType, val: Union[str, int, datetime]) -> datetime: """JSON ISO8601 string into Python datetime.""" - return micros_to_timestamptz(timestamptz_to_micros(val)) + if isinstance(val, str): + val = timestamptz_to_micros(val) + if isinstance(val, int): + return micros_to_timestamptz(val) + else: + return val @from_json.register(FloatType) @@ -540,20 +560,24 @@ def _(_: StringType, val: str) -> str: @from_json.register(FixedType) -def _(t: FixedType, val: str) -> bytes: +def _(t: FixedType, val: Union[str, bytes]) -> bytes: """JSON hexadecimal encoded string into bytes.""" - b = codecs.decode(val.encode(UTF8), "hex") + if isinstance(val, str): + val = codecs.decode(val.encode(UTF8), "hex") - if len(t) != len(b): - raise ValueError(f"FixedType has length {len(t)}, which is different from the value: {len(b)}") + if len(t) != len(val): + raise ValueError(f"FixedType has length {len(t)}, which is different from the value: {len(val)}") - return b + return val @from_json.register(BinaryType) -def _(_: BinaryType, val: str) -> bytes: +def _(_: BinaryType, val: Union[bytes, str]) -> bytes: """JSON hexadecimal encoded string into bytes.""" - return codecs.decode(val.encode(UTF8), "hex") + if isinstance(val, str): + return codecs.decode(val.encode(UTF8), "hex") + else: + return val @from_json.register(DecimalType) @@ -563,6 +587,11 @@ def _(_: DecimalType, val: str) -> Decimal: @from_json.register(UUIDType) -def _(_: UUIDType, val: str) -> uuid.UUID: +def _(_: UUIDType, val: Union[str, bytes, uuid.UUID]) -> uuid.UUID: """Convert JSON string into Python UUID.""" - return uuid.UUID(val) + if isinstance(val, str): + return uuid.UUID(val) + elif isinstance(val, bytes): + return uuid.UUID(bytes=val) + else: + return val diff --git a/pyiceberg/expressions/literals.py b/pyiceberg/expressions/literals.py index e52ed5e0ab..81e613d55a 100644 --- a/pyiceberg/expressions/literals.py +++ b/pyiceberg/expressions/literals.py @@ -23,7 +23,7 @@ import struct from abc import ABC, abstractmethod -from datetime import date, datetime +from datetime import date, datetime, time from decimal import ROUND_HALF_UP, Decimal from functools import singledispatchmethod from math import isnan @@ -54,6 +54,7 @@ datetime_to_micros, micros_to_days, time_str_to_micros, + time_to_micros, timestamp_to_micros, timestamptz_to_micros, ) @@ -152,6 +153,8 @@ def literal(value: L) -> Literal[L]: return TimestampLiteral(datetime_to_micros(value)) # type: ignore elif isinstance(value, date): return DateLiteral(date_to_days(value)) # type: ignore + elif isinstance(value, time): + return TimeLiteral(time_to_micros(value)) # type: ignore else: raise TypeError(f"Invalid literal value: {repr(value)}") diff --git a/pyiceberg/table/update/schema.py b/pyiceberg/table/update/schema.py index 8ee3b43c24..6ad01e97f2 100644 --- a/pyiceberg/table/update/schema.py +++ b/pyiceberg/table/update/schema.py @@ -20,9 +20,10 @@ from copy import copy from dataclasses import dataclass from enum import Enum -from typing import TYPE_CHECKING, Dict, List, Optional, Set, Tuple, Union +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Set, Tuple, Union from pyiceberg.exceptions import ResolveError, ValidationError +from pyiceberg.expressions import literal # type: ignore from pyiceberg.schema import ( PartnerAccessor, Schema, @@ -47,6 +48,7 @@ UpdatesAndRequirements, UpdateTableMetadata, ) +from pyiceberg.typedef import L from pyiceberg.types import IcebergType, ListType, MapType, NestedField, PrimitiveType, StructType if TYPE_CHECKING: @@ -153,7 +155,12 @@ def union_by_name(self, new_schema: Union[Schema, "pa.Schema"]) -> UpdateSchema: return self def add_column( - self, path: Union[str, Tuple[str, ...]], field_type: IcebergType, doc: Optional[str] = None, required: bool = False + self, + path: Union[str, Tuple[str, ...]], + field_type: IcebergType, + doc: Optional[str] = None, + required: bool = False, + default_value: Optional[L] = None, ) -> UpdateSchema: """Add a new column to a nested struct or Add a new top-level column. @@ -168,6 +175,7 @@ def add_column( field_type: Type for the new column. doc: Documentation string for the new column. required: Whether the new column is required. + default_value: Default value for the new column. Returns: This for method chaining. @@ -177,10 +185,6 @@ def add_column( raise ValueError(f"Cannot add column with ambiguous name: {path}, provide a tuple instead") path = (path,) - if required and not self._allow_incompatible_changes: - # Table format version 1 and 2 cannot add required column because there is no initial value - raise ValueError(f"Incompatible change: cannot add required column: {'.'.join(path)}") - name = path[-1] parent = path[:-1] @@ -212,13 +216,34 @@ def add_column( # assign new IDs in order new_id = self.assign_new_column_id() + new_type = assign_fresh_schema_ids(field_type, self.assign_new_column_id) + + if default_value is not None: + try: + # To make sure that the value is valid for the type + initial_default = literal(default_value).to(new_type).value + except ValueError as e: + raise ValueError(f"Invalid default value: {e}") from e + else: + initial_default = default_value # type: ignore + + if (required and initial_default is None) and not self._allow_incompatible_changes: + # Table format version 1 and 2 cannot add required column because there is no initial value + raise ValueError(f"Incompatible change: cannot add required column: {'.'.join(path)}") # update tracking for moves self._added_name_to_id[full_name] = new_id self._id_to_parent[new_id] = parent_full_path - new_type = assign_fresh_schema_ids(field_type, self.assign_new_column_id) - field = NestedField(field_id=new_id, name=name, field_type=new_type, required=required, doc=doc) + field = NestedField( + field_id=new_id, + name=name, + field_type=new_type, + required=required, + doc=doc, + initial_default=initial_default, + write_default=initial_default, + ) if parent_id in self._adds: self._adds[parent_id].append(field) @@ -250,6 +275,19 @@ def delete_column(self, path: Union[str, Tuple[str, ...]]) -> UpdateSchema: return self + def set_default_value(self, path: Union[str, Tuple[str, ...]], default_value: Optional[L]) -> UpdateSchema: + """Set the default value of a column. + + Args: + path: The path to the column. + + Returns: + The UpdateSchema with the delete operation staged. + """ + self._set_column_default_value(path, default_value) + + return self + def rename_column(self, path_from: Union[str, Tuple[str, ...]], new_name: str) -> UpdateSchema: """Update the name of a column. @@ -273,6 +311,8 @@ def rename_column(self, path_from: Union[str, Tuple[str, ...]], new_name: str) - field_type=updated.field_type, doc=updated.doc, required=updated.required, + initial_default=updated.initial_default, + write_default=updated.write_default, ) else: self._updates[field_from.field_id] = NestedField( @@ -281,6 +321,8 @@ def rename_column(self, path_from: Union[str, Tuple[str, ...]], new_name: str) - field_type=field_from.field_type, doc=field_from.doc, required=field_from.required, + initial_default=field_from.initial_default, + write_default=field_from.write_default, ) # Lookup the field because of casing @@ -330,6 +372,8 @@ def _set_column_requirement(self, path: Union[str, Tuple[str, ...]], required: b field_type=updated.field_type, doc=updated.doc, required=required, + initial_default=updated.initial_default, + write_default=updated.write_default, ) else: self._updates[field.field_id] = NestedField( @@ -338,6 +382,52 @@ def _set_column_requirement(self, path: Union[str, Tuple[str, ...]], required: b field_type=field.field_type, doc=field.doc, required=required, + initial_default=field.initial_default, + write_default=field.write_default, + ) + + def _set_column_default_value(self, path: Union[str, Tuple[str, ...]], default_value: Any) -> None: + path = (path,) if isinstance(path, str) else path + name = ".".join(path) + + field = self._schema.find_field(name, self._case_sensitive) + + if default_value is not None: + try: + # To make sure that the value is valid for the type + default_value = literal(default_value).to(field.field_type).value + except ValueError as e: + raise ValueError(f"Invalid default value: {e}") from e + + if field.required and default_value == field.write_default: + # if the change is a noop, allow it even if allowIncompatibleChanges is false + return + + if not self._allow_incompatible_changes and field.required and default_value is None: + raise ValueError("Cannot change change default-value of a required column to None") + + if field.field_id in self._deletes: + raise ValueError(f"Cannot update a column that will be deleted: {name}") + + if updated := self._updates.get(field.field_id): + self._updates[field.field_id] = NestedField( + field_id=updated.field_id, + name=updated.name, + field_type=updated.field_type, + doc=updated.doc, + required=updated.required, + initial_default=updated.initial_default, + write_default=default_value, + ) + else: + self._updates[field.field_id] = NestedField( + field_id=field.field_id, + name=field.name, + field_type=field.field_type, + doc=field.doc, + required=field.required, + initial_default=field.initial_default, + write_default=default_value, ) def update_column( @@ -387,6 +477,8 @@ def update_column( field_type=field_type or updated.field_type, doc=doc if doc is not None else updated.doc, required=updated.required, + initial_default=updated.initial_default, + write_default=updated.write_default, ) else: self._updates[field.field_id] = NestedField( @@ -395,6 +487,8 @@ def update_column( field_type=field_type or field.field_type, doc=doc if doc is not None else field.doc, required=field.required, + initial_default=field.initial_default, + write_default=field.write_default, ) if required is not None: @@ -636,19 +730,35 @@ def struct(self, struct: StructType, field_results: List[Optional[IcebergType]]) name = field.name doc = field.doc required = field.required + write_default = field.write_default # There is an update if update := self._updates.get(field.field_id): name = update.name doc = update.doc required = update.required - - if field.name == name and field.field_type == result_type and field.required == required and field.doc == doc: + write_default = update.write_default + + if ( + field.name == name + and field.field_type == result_type + and field.required == required + and field.doc == doc + and field.write_default == write_default + ): new_fields.append(field) else: has_changes = True new_fields.append( - NestedField(field_id=field.field_id, name=name, field_type=result_type, required=required, doc=doc) + NestedField( + field_id=field.field_id, + name=name, + field_type=result_type, + required=required, + doc=doc, + initial_default=field.initial_default, + write_default=write_default, + ) ) if has_changes: diff --git a/pyiceberg/typedef.py b/pyiceberg/typedef.py index dbc4a164fb..d9ace9d971 100644 --- a/pyiceberg/typedef.py +++ b/pyiceberg/typedef.py @@ -17,7 +17,7 @@ from __future__ import annotations from abc import abstractmethod -from datetime import date, datetime +from datetime import date, datetime, time from decimal import Decimal from typing import ( TYPE_CHECKING, @@ -94,7 +94,7 @@ def __missing__(self, key: K) -> V: """A recursive dictionary type for nested structures in PyIceberg.""" # Represents the literal value -L = TypeVar("L", str, bool, int, float, bytes, UUID, Decimal, datetime, date, covariant=True) +L = TypeVar("L", str, bool, int, float, bytes, UUID, Decimal, datetime, date, time, covariant=True) @runtime_checkable diff --git a/pyiceberg/types.py b/pyiceberg/types.py index a33e56581a..5822597952 100644 --- a/pyiceberg/types.py +++ b/pyiceberg/types.py @@ -35,6 +35,7 @@ import re from functools import cached_property from typing import ( + Annotated, Any, ClassVar, Dict, @@ -44,6 +45,7 @@ ) from pydantic import ( + BeforeValidator, Field, PrivateAttr, SerializeAsAny, @@ -51,7 +53,7 @@ model_serializer, model_validator, ) -from pydantic_core.core_schema import ValidatorFunctionWrapHandler +from pydantic_core.core_schema import ValidationInfo, ValidatorFunctionWrapHandler from pyiceberg.exceptions import ValidationError from pyiceberg.typedef import IcebergBaseModel, IcebergRootModel, L, TableVersion @@ -290,6 +292,18 @@ def __eq__(self, other: Any) -> bool: return self.root == other.root if isinstance(other, DecimalType) else False +def _deserialize_default_value(v: Any, context: ValidationInfo) -> Any: + if v is not None: + from pyiceberg.conversions import from_json + + return from_json(context.data.get("field_type"), v) + else: + return None + + +DefaultValue = Annotated[L, BeforeValidator(_deserialize_default_value)] + + class NestedField(IcebergType): """Represents a field of a struct, a map key, a map value, or a list element. @@ -326,8 +340,8 @@ class NestedField(IcebergType): field_type: SerializeAsAny[IcebergType] = Field(alias="type") required: bool = Field(default=False) doc: Optional[str] = Field(default=None, repr=False) - initial_default: Optional[Any] = Field(alias="initial-default", default=None, repr=False) - write_default: Optional[L] = Field(alias="write-default", default=None, repr=False) # type: ignore + initial_default: Optional[DefaultValue] = Field(alias="initial-default", default=None, repr=False) # type: ignore + write_default: Optional[DefaultValue] = Field(alias="write-default", default=None, repr=False) # type: ignore @field_validator("field_type", mode="before") def convert_field_type(cls, v: Any) -> IcebergType: @@ -361,6 +375,26 @@ def __init__( data["write-default"] = data["write-default"] if "write-default" in data else write_default super().__init__(**data) + @model_serializer() + def serialize_model(self) -> Dict[str, Any]: + from pyiceberg.conversions import to_json + + fields = { + "id": self.field_id, + "name": self.name, + "type": self.field_type, + "required": self.required, + } + + if self.doc is not None: + fields["doc"] = self.doc + if self.initial_default is not None: + fields["initial-default"] = to_json(self.field_type, self.initial_default) + if self.write_default is not None: + fields["write-default"] = to_json(self.field_type, self.write_default) + + return fields + def __str__(self) -> str: """Return the string representation of the NestedField class.""" doc = "" if not self.doc else f" ({self.doc})" diff --git a/pyiceberg/utils/schema_conversion.py b/pyiceberg/utils/schema_conversion.py index 6959380d63..ec2fccd509 100644 --- a/pyiceberg/utils/schema_conversion.py +++ b/pyiceberg/utils/schema_conversion.py @@ -530,7 +530,7 @@ def field(self, field: NestedField, field_result: AvroType) -> AvroType: } if field.write_default is not None: - result["default"] = field.write_default # type: ignore + result["default"] = field.write_default elif field.optional: result["default"] = None diff --git a/tests/integration/test_rest_schema.py b/tests/integration/test_rest_schema.py index fd975d81c9..4462da1c8c 100644 --- a/tests/integration/test_rest_schema.py +++ b/tests/integration/test_rest_schema.py @@ -15,6 +15,10 @@ # specific language governing permissions and limitations # under the License. # pylint:disable=redefined-outer-name +from datetime import date, datetime, time, timezone +from decimal import Decimal +from typing import Any +from uuid import UUID import pytest @@ -27,6 +31,7 @@ from pyiceberg.table.sorting import SortField, SortOrder from pyiceberg.table.update.schema import UpdateSchema from pyiceberg.transforms import IdentityTransform +from pyiceberg.typedef import EMPTY_DICT, Properties from pyiceberg.types import ( BinaryType, BooleanType, @@ -69,7 +74,7 @@ def simple_table(catalog: Catalog, table_schema_simple: Schema) -> Table: return _create_table_with_schema(catalog, table_schema_simple) -def _create_table_with_schema(catalog: Catalog, schema: Schema) -> Table: +def _create_table_with_schema(catalog: Catalog, schema: Schema, properties: Properties = EMPTY_DICT) -> Table: tbl_name = "default.test_schema_evolution" try: catalog.drop_table(tbl_name) @@ -78,7 +83,7 @@ def _create_table_with_schema(catalog: Catalog, schema: Schema) -> Table: return catalog.create_table( identifier=tbl_name, schema=schema, - properties={TableProperties.DEFAULT_NAME_MAPPING: create_mapping_from_schema(schema).model_dump_json()}, + properties={TableProperties.DEFAULT_NAME_MAPPING: create_mapping_from_schema(schema).model_dump_json(), **properties}, ) @@ -1074,9 +1079,8 @@ def test_add_required_column(catalog: Catalog) -> None: schema_ = Schema(NestedField(field_id=1, name="a", field_type=BooleanType(), required=False)) table = _create_table_with_schema(catalog, schema_) update = table.update_schema() - with pytest.raises(ValueError) as exc_info: + with pytest.raises(ValueError, match="Incompatible change: cannot add required column: data"): update.add_column(path="data", field_type=IntegerType(), required=True) - assert "Incompatible change: cannot add required column: data" in str(exc_info.value) new_schema = ( UpdateSchema(transaction=table.transaction(), allow_incompatible_changes=True) @@ -1089,16 +1093,102 @@ def test_add_required_column(catalog: Catalog) -> None: ) +@pytest.mark.integration +@pytest.mark.parametrize( + "iceberg_type, initial_default, write_default", + [ + (BooleanType(), True, False), + (IntegerType(), 123, 456), + (LongType(), 123, 456), + (FloatType(), 19.25, 22.27), + (DoubleType(), 19.25, 22.27), + (DecimalType(10, 2), Decimal("19.25"), Decimal("22.27")), + (DecimalType(10, 2), Decimal("19.25"), Decimal("22.27")), + (StringType(), "abc", "def"), + (DateType(), date(1990, 3, 1), date(1991, 3, 1)), + (TimeType(), time(19, 25, 22), time(22, 25, 22)), + (TimestampType(), datetime(1990, 5, 1, 22, 1, 1), datetime(2000, 5, 1, 22, 1, 1)), + ( + TimestamptzType(), + datetime(1990, 5, 1, 22, 1, 1, tzinfo=timezone.utc), + datetime(2000, 5, 1, 22, 1, 1, tzinfo=timezone.utc), + ), + (BinaryType(), b"123", b"456"), + (FixedType(4), b"1234", b"5678"), + (UUIDType(), UUID(int=0x12345678123456781234567812345678), UUID(int=0x32145678123456781234567812345678)), + ], +) +def test_initial_default_all_columns( + catalog: Catalog, iceberg_type: PrimitiveType, initial_default: Any, write_default: Any +) -> None: + # Round trips all the types through the rest catalog to check the serialization + table = _create_table_with_schema(catalog, Schema(), properties={TableProperties.FORMAT_VERSION: 3}) + + tx = table.update_schema() + tx.add_column(path="data", field_type=iceberg_type, required=True, default_value=initial_default) + tx.add_column(path="nested", field_type=StructType(), required=False) + tx.commit() + + tx = table.update_schema() + tx.add_column(path=("nested", "data"), field_type=iceberg_type, required=True, default_value=initial_default) + tx.commit() + + for field_id in [1, 3]: + field = table.schema().find_field(field_id) + assert field.initial_default == initial_default + assert field.write_default == initial_default + + with table.update_schema() as tx: + tx.set_default_value("data", write_default) + tx.set_default_value(("nested", "data"), write_default) + + for field_id in [1, 3]: + field = table.schema().find_field(field_id) + assert field.initial_default == initial_default + assert field.write_default == write_default + + +@pytest.mark.integration +def test_add_required_column_initial_default(catalog: Catalog) -> None: + schema_ = Schema(NestedField(field_id=1, name="a", field_type=BooleanType(), required=False)) + table = _create_table_with_schema(catalog, schema_, properties={TableProperties.FORMAT_VERSION: 3}) + + table.update_schema().add_column(path="data", field_type=IntegerType(), required=True, default_value=22).commit() + + assert table.schema() == Schema( + NestedField(field_id=1, name="a", field_type=BooleanType(), required=False), + NestedField(field_id=2, name="data", field_type=IntegerType(), required=True, initial_default=22, write_default=22), + schema_id=1, + ) + + # Update + table.update_schema().update_column(path="data", field_type=LongType()).rename_column("a", "bool").commit() + + assert table.schema() == Schema( + NestedField(field_id=1, name="bool", field_type=BooleanType(), required=False), + NestedField(field_id=2, name="data", field_type=LongType(), required=True, initial_default=22, write_default=22), + schema_id=1, + ) + + +@pytest.mark.integration +def test_add_required_column_initial_default_invalid_value(catalog: Catalog) -> None: + schema_ = Schema(NestedField(field_id=1, name="a", field_type=BooleanType(), required=False)) + table = _create_table_with_schema(catalog, schema_) + update = table.update_schema() + with pytest.raises(ValueError, match="Invalid default value: Could not convert abc into a int"): + update.add_column(path="data", field_type=IntegerType(), required=True, default_value="abc") + + @pytest.mark.integration def test_add_required_column_case_insensitive(catalog: Catalog) -> None: schema_ = Schema(NestedField(field_id=1, name="id", field_type=BooleanType(), required=False)) table = _create_table_with_schema(catalog, schema_) - with pytest.raises(ValueError) as exc_info: + with pytest.raises(ValueError, match="already exists: ID"): with table.transaction() as txn: with txn.update_schema(allow_incompatible_changes=True) as update: update.case_sensitive(False).add_column(path="ID", field_type=IntegerType(), required=True) - assert "already exists: ID" in str(exc_info.value) new_schema = ( UpdateSchema(transaction=table.transaction(), allow_incompatible_changes=True) diff --git a/tests/test_types.py b/tests/test_types.py index e14ec9dd6c..586022e5a4 100644 --- a/tests/test_types.py +++ b/tests/test_types.py @@ -240,11 +240,9 @@ def test_nested_field() -> None: assert str(field_var) == str(eval(repr(field_var))) assert field_var == pickle.loads(pickle.dumps(field_var)) - with pytest.raises(pydantic_core.ValidationError) as exc_info: + with pytest.raises(pydantic_core.ValidationError, match=".*validation errors for NestedField.*"): _ = (NestedField(1, "field", StringType(), required=True, write_default=(1, "a", True)),) # type: ignore - assert "validation errors for NestedField" in str(exc_info.value) - def test_nested_field_complex_type_as_str_unsupported() -> None: unsupported_types = ["list", "map", "struct"] diff --git a/tests/utils/test_manifest.py b/tests/utils/test_manifest.py index 8d793c468c..5740587958 100644 --- a/tests/utils/test_manifest.py +++ b/tests/utils/test_manifest.py @@ -79,7 +79,7 @@ def test_read_manifest_entry(generated_manifest_entry_file: str) -> None: data_file = manifest_entry.data_file - assert data_file.content is DataFileContent.DATA + assert data_file.content == DataFileContent.DATA assert ( data_file.file_path == "/home/iceberg/warehouse/nyc/taxis_partitioned/data/VendorID=null/00000-633-d8a4223e-dc97-45a1-86e1-adaba6e8abd7-00001.parquet" @@ -416,7 +416,7 @@ def test_write_manifest( data_file = manifest_entry.data_file - assert data_file.content is DataFileContent.DATA + assert data_file.content == DataFileContent.DATA assert ( data_file.file_path == "/home/iceberg/warehouse/nyc/taxis_partitioned/data/VendorID=null/00000-633-d8a4223e-dc97-45a1-86e1-adaba6e8abd7-00001.parquet"