diff --git a/pyiceberg/manifest.py b/pyiceberg/manifest.py index 3b8138b61a..defe5958c5 100644 --- a/pyiceberg/manifest.py +++ b/pyiceberg/manifest.py @@ -18,6 +18,7 @@ import math from abc import ABC, abstractmethod +from copy import copy from enum import Enum from types import TracebackType from typing import ( @@ -909,7 +910,7 @@ def __init__(self, output_file: OutputFile, snapshot_id: int, parent_snapshot_id self._sequence_number = sequence_number def prepare_manifest(self, manifest_file: ManifestFile) -> ManifestFile: - wrapped_manifest_file = ManifestFile(*manifest_file.record_fields()) + wrapped_manifest_file = copy(manifest_file) if wrapped_manifest_file.sequence_number == UNASSIGNED_SEQ: # if the sequence number is being assigned here, then the manifest must be created by the current operation. diff --git a/pyiceberg/partitioning.py b/pyiceberg/partitioning.py index a3cf255341..f4e53a59a5 100644 --- a/pyiceberg/partitioning.py +++ b/pyiceberg/partitioning.py @@ -229,9 +229,9 @@ def partition_to_path(self, data: Record, schema: Schema) -> str: field_strs = [] value_strs = [] - for pos, value in enumerate(data.record_fields()): + for pos in range(len(self.fields)): partition_field = self.fields[pos] - value_str = partition_field.transform.to_human_string(field_types[pos].field_type, value=value) + value_str = partition_field.transform.to_human_string(field_types[pos].field_type, value=data[pos]) value_str = quote(value_str, safe='') value_strs.append(value_str) diff --git a/pyiceberg/table/snapshots.py b/pyiceberg/table/snapshots.py index f74ac4b7d4..79eb8b0b8a 100644 --- a/pyiceberg/table/snapshots.py +++ b/pyiceberg/table/snapshots.py @@ -274,14 +274,14 @@ def set_partition_summary_limit(self, limit: int) -> None: def add_file(self, data_file: DataFile, schema: Schema, partition_spec: PartitionSpec = UNPARTITIONED_PARTITION_SPEC) -> None: self.metrics.add_file(data_file) - if len(data_file.partition.record_fields()) != 0: + if len(data_file.partition) > 0: self.update_partition_metrics(partition_spec=partition_spec, file=data_file, is_add_file=True, schema=schema) def remove_file( self, data_file: DataFile, schema: Schema, partition_spec: PartitionSpec = UNPARTITIONED_PARTITION_SPEC ) -> None: self.metrics.remove_file(data_file) - if len(data_file.partition.record_fields()) != 0: + if len(data_file.partition) > 0: self.update_partition_metrics(partition_spec=partition_spec, file=data_file, is_add_file=False, schema=schema) def update_partition_metrics(self, partition_spec: PartitionSpec, file: DataFile, is_add_file: bool, schema: Schema) -> None: diff --git a/pyiceberg/typedef.py b/pyiceberg/typedef.py index 6ccf9526ba..26f4d4d5ac 100644 --- a/pyiceberg/typedef.py +++ b/pyiceberg/typedef.py @@ -25,7 +25,6 @@ Callable, Dict, Generic, - List, Literal, Optional, Protocol, @@ -198,9 +197,9 @@ def __repr__(self) -> str: """Return the string representation of the Record class.""" return f"{self.__class__.__name__}[{', '.join(f'{key}={repr(value)}' for key, value in self.__dict__.items() if not key.startswith('_'))}]" - def record_fields(self) -> List[str]: - """Return values of all the fields of the Record class except those specified in skip_fields.""" - return [self.__getattribute__(v) if hasattr(self, v) else None for v in self._position_to_field_name] + def __len__(self) -> int: + """Return the number of fields in the Record class.""" + return len(self._position_to_field_name) def __hash__(self) -> int: """Return hash value of the Record class.""" diff --git a/tests/integration/test_rest_manifest.py b/tests/integration/test_rest_manifest.py index 8191209ae6..0e768c6e68 100644 --- a/tests/integration/test_rest_manifest.py +++ b/tests/integration/test_rest_manifest.py @@ -17,6 +17,7 @@ # pylint:disable=redefined-outer-name import inspect +from copy import copy from enum import Enum from tempfile import TemporaryDirectory from typing import Any @@ -26,7 +27,7 @@ from pyiceberg.catalog import Catalog, load_catalog from pyiceberg.io.pyarrow import PyArrowFileIO -from pyiceberg.manifest import DataFile, ManifestEntry, write_manifest +from pyiceberg.manifest import DataFile, write_manifest from pyiceberg.table import Table from pyiceberg.utils.lazydict import LazyDict @@ -99,7 +100,7 @@ def test_write_sample_manifest(table_test_all_types: Table) -> None: sort_order_id=entry.data_file.sort_order_id, spec_id=entry.data_file.spec_id, ) - wrapped_entry_v2 = ManifestEntry(*entry.record_fields()) + wrapped_entry_v2 = copy(entry) wrapped_entry_v2.data_file = wrapped_data_file_v2_debug wrapped_entry_v2_dict = todict(wrapped_entry_v2) # This one should not be written