Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions pyiceberg/expressions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -383,6 +383,10 @@ def __repr__(self) -> str:
@abstractmethod
def as_bound(self) -> Type[BoundUnaryPredicate[Any]]: ...

def __hash__(self) -> int:
"""Return hash value of the UnaryPredicate class."""
return hash(str(self))


class BoundUnaryPredicate(BoundPredicate[L], ABC):
def __repr__(self) -> str:
Expand Down Expand Up @@ -412,6 +416,10 @@ def __invert__(self) -> BoundNotNull[L]:
def as_unbound(self) -> Type[IsNull]:
return IsNull

def __hash__(self) -> int:
"""Return hash value of the BoundIsNull class."""
return hash(str(self))


class BoundNotNull(BoundUnaryPredicate[L]):
def __new__(cls, term: BoundTerm[L]): # type: ignore # pylint: disable=W0221
Expand Down Expand Up @@ -698,6 +706,10 @@ def __repr__(self) -> str:
@abstractmethod
def as_bound(self) -> Type[BoundLiteralPredicate[L]]: ...

def __hash__(self) -> int:
"""Return hash value of the UnaryPredicate class."""
return hash(str(self))


class BoundLiteralPredicate(BoundPredicate[L], ABC):
literal: Literal[L]
Expand Down Expand Up @@ -731,6 +743,10 @@ def __invert__(self) -> BoundNotEqualTo[L]:
def as_unbound(self) -> Type[EqualTo[L]]:
return EqualTo

def __hash__(self) -> int:
"""Return hash value of the BoundEqualTo class."""
return hash(str(self))


class BoundNotEqualTo(BoundLiteralPredicate[L]):
def __invert__(self) -> BoundEqualTo[L]:
Expand Down
5 changes: 4 additions & 1 deletion pyiceberg/io/pyarrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -1725,6 +1725,7 @@ def write_file(io: FileIO, table_metadata: TableMetadata, tasks: Iterator[WriteT

file_path = f'{table_metadata.location}/data/{task.generate_data_file_path("parquet")}' # generate_data_file_filename
schema = table_metadata.schema()

arrow_file_schema = schema_to_pyarrow(schema)

fo = io.new_output(file_path)
Expand All @@ -1735,7 +1736,9 @@ def write_file(io: FileIO, table_metadata: TableMetadata, tasks: Iterator[WriteT
)
with fo.create(overwrite=True) as fos:
with pq.ParquetWriter(fos, schema=arrow_file_schema, **parquet_writer_kwargs) as writer:
writer.write_table(task.df, row_group_size=row_group_size)
# align the columns accordingly in case input arrow table has columns in order different from iceberg table
df_to_write = task.df.select(arrow_file_schema.names)
writer.write_table(df_to_write, row_group_size=row_group_size)

data_file = DataFile(
content=DataFileContent.DATA,
Expand Down
Loading