Skip to content

Commit cec051f

Browse files
jaychiaJay Chia
and
Jay Chia
authored
Add tests and fixes for Daft integration (#381)
* Implement to_daft on Table instead of Scan * Add integration tests --------- Co-authored-by: Jay Chia <[email protected]@users.noreply.github.com>
1 parent a7794ca commit cec051f

File tree

2 files changed

+32
-10
lines changed

2 files changed

+32
-10
lines changed

pyiceberg/table/__init__.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1025,6 +1025,16 @@ def __repr__(self) -> str:
10251025
result_str = f"{table_name}(\n {schema_str}\n),\n{partition_str},\n{sort_order_str},\n{snapshot_str}"
10261026
return result_str
10271027

1028+
def to_daft(self) -> daft.DataFrame:
1029+
"""Read a Daft DataFrame lazily from this Iceberg table.
1030+
1031+
Returns:
1032+
daft.DataFrame: Unmaterialized Daft Dataframe created from the Iceberg table
1033+
"""
1034+
import daft
1035+
1036+
return daft.read_iceberg(self)
1037+
10281038

10291039
class StaticTable(Table):
10301040
"""Load a table directly from a metadata file (i.e., without using a catalog)."""
@@ -1382,16 +1392,6 @@ def to_ray(self) -> ray.data.dataset.Dataset:
13821392

13831393
return ray.data.from_arrow(self.to_arrow())
13841394

1385-
def to_daft(self) -> daft.DataFrame:
1386-
"""Read a Daft DataFrame lazily from this Iceberg table.
1387-
1388-
Returns:
1389-
daft.DataFrame: Unmaterialized Daft Dataframe created from the Iceberg table
1390-
"""
1391-
import daft
1392-
1393-
return daft.read_iceberg(self)
1394-
13951395

13961396
class MoveOperation(Enum):
13971397
First = 1

tests/integration/test_reads.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,28 @@ def test_pyarrow_limit(catalog: Catalog) -> None:
178178
assert len(full_result) == 10
179179

180180

181+
@pytest.mark.integration
182+
@pytest.mark.filterwarnings("ignore")
183+
@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('catalog_hive'), pytest.lazy_fixture('catalog_rest')])
184+
def test_daft_nan(catalog: Catalog) -> None:
185+
table_test_null_nan_rewritten = catalog.load_table("default.test_null_nan_rewritten")
186+
df = table_test_null_nan_rewritten.to_daft()
187+
assert df.count_rows() == 3
188+
assert math.isnan(df.to_pydict()["col_numeric"][0])
189+
190+
191+
@pytest.mark.integration
192+
@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('catalog_hive'), pytest.lazy_fixture('catalog_rest')])
193+
def test_daft_nan_rewritten(catalog: Catalog) -> None:
194+
table_test_null_nan_rewritten = catalog.load_table("default.test_null_nan_rewritten")
195+
df = table_test_null_nan_rewritten.to_daft()
196+
df = df.where(df["col_numeric"].float.is_nan())
197+
df = df.select("idx", "col_numeric")
198+
assert df.count_rows() == 1
199+
assert df.to_pydict()["idx"][0] == 1
200+
assert math.isnan(df.to_pydict()["col_numeric"][0])
201+
202+
181203
@pytest.mark.integration
182204
@pytest.mark.filterwarnings("ignore")
183205
@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('catalog_hive'), pytest.lazy_fixture('catalog_rest')])

0 commit comments

Comments
 (0)