Skip to content

Remove record_fields from the Record class #580

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
May 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion pyiceberg/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

import math
from abc import ABC, abstractmethod
from copy import copy
from enum import Enum
from types import TracebackType
from typing import (
Expand Down Expand Up @@ -909,7 +910,7 @@ def __init__(self, output_file: OutputFile, snapshot_id: int, parent_snapshot_id
self._sequence_number = sequence_number

def prepare_manifest(self, manifest_file: ManifestFile) -> ManifestFile:
wrapped_manifest_file = ManifestFile(*manifest_file.record_fields())
wrapped_manifest_file = copy(manifest_file)

if wrapped_manifest_file.sequence_number == UNASSIGNED_SEQ:
# if the sequence number is being assigned here, then the manifest must be created by the current operation.
Expand Down
4 changes: 2 additions & 2 deletions pyiceberg/partitioning.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,9 +229,9 @@ def partition_to_path(self, data: Record, schema: Schema) -> str:

field_strs = []
value_strs = []
for pos, value in enumerate(data.record_fields()):
for pos in range(len(self.fields)):
partition_field = self.fields[pos]
value_str = partition_field.transform.to_human_string(field_types[pos].field_type, value=value)
value_str = partition_field.transform.to_human_string(field_types[pos].field_type, value=data[pos])

value_str = quote(value_str, safe='')
value_strs.append(value_str)
Expand Down
4 changes: 2 additions & 2 deletions pyiceberg/table/snapshots.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,14 +274,14 @@ def set_partition_summary_limit(self, limit: int) -> None:

def add_file(self, data_file: DataFile, schema: Schema, partition_spec: PartitionSpec = UNPARTITIONED_PARTITION_SPEC) -> None:
self.metrics.add_file(data_file)
if len(data_file.partition.record_fields()) != 0:
if len(data_file.partition) > 0:
self.update_partition_metrics(partition_spec=partition_spec, file=data_file, is_add_file=True, schema=schema)

def remove_file(
self, data_file: DataFile, schema: Schema, partition_spec: PartitionSpec = UNPARTITIONED_PARTITION_SPEC
) -> None:
self.metrics.remove_file(data_file)
if len(data_file.partition.record_fields()) != 0:
if len(data_file.partition) > 0:
self.update_partition_metrics(partition_spec=partition_spec, file=data_file, is_add_file=False, schema=schema)

def update_partition_metrics(self, partition_spec: PartitionSpec, file: DataFile, is_add_file: bool, schema: Schema) -> None:
Expand Down
7 changes: 3 additions & 4 deletions pyiceberg/typedef.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
Callable,
Dict,
Generic,
List,
Literal,
Optional,
Protocol,
Expand Down Expand Up @@ -198,9 +197,9 @@ def __repr__(self) -> str:
"""Return the string representation of the Record class."""
return f"{self.__class__.__name__}[{', '.join(f'{key}={repr(value)}' for key, value in self.__dict__.items() if not key.startswith('_'))}]"

def record_fields(self) -> List[str]:
"""Return values of all the fields of the Record class except those specified in skip_fields."""
return [self.__getattribute__(v) if hasattr(self, v) else None for v in self._position_to_field_name]
def __len__(self) -> int:
"""Return the number of fields in the Record class."""
return len(self._position_to_field_name)

def __hash__(self) -> int:
"""Return hash value of the Record class."""
Expand Down
5 changes: 3 additions & 2 deletions tests/integration/test_rest_manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
# pylint:disable=redefined-outer-name

import inspect
from copy import copy
from enum import Enum
from tempfile import TemporaryDirectory
from typing import Any
Expand All @@ -26,7 +27,7 @@

from pyiceberg.catalog import Catalog, load_catalog
from pyiceberg.io.pyarrow import PyArrowFileIO
from pyiceberg.manifest import DataFile, ManifestEntry, write_manifest
from pyiceberg.manifest import DataFile, write_manifest
from pyiceberg.table import Table
from pyiceberg.utils.lazydict import LazyDict

Expand Down Expand Up @@ -99,7 +100,7 @@ def test_write_sample_manifest(table_test_all_types: Table) -> None:
sort_order_id=entry.data_file.sort_order_id,
spec_id=entry.data_file.spec_id,
)
wrapped_entry_v2 = ManifestEntry(*entry.record_fields())
wrapped_entry_v2 = copy(entry)
wrapped_entry_v2.data_file = wrapped_data_file_v2_debug
wrapped_entry_v2_dict = todict(wrapped_entry_v2)
# This one should not be written
Expand Down