diff --git a/crates/iceberg/src/arrow/reader.rs b/crates/iceberg/src/arrow/reader.rs index a7dd5d071f..490777e57c 100644 --- a/crates/iceberg/src/arrow/reader.rs +++ b/crates/iceberg/src/arrow/reader.rs @@ -1523,6 +1523,7 @@ message schema { deletes: vec![], sequence_number: 0, equality_ids: vec![], + file_size_in_bytes: 0, })] .into_iter(), )) as FileScanTaskStream; diff --git a/crates/iceberg/src/scan/context.rs b/crates/iceberg/src/scan/context.rs index ea6850849b..ee7187550c 100644 --- a/crates/iceberg/src/scan/context.rs +++ b/crates/iceberg/src/scan/context.rs @@ -143,6 +143,7 @@ impl ManifestEntryContext { deletes, sequence_number: self.manifest_entry.sequence_number().unwrap_or(0), equality_ids: self.manifest_entry.data_file().equality_ids().to_vec(), + file_size_in_bytes: self.manifest_entry.data_file().file_size_in_bytes(), }) } } diff --git a/crates/iceberg/src/scan/mod.rs b/crates/iceberg/src/scan/mod.rs index 6550094ae0..a8df739b1e 100644 --- a/crates/iceberg/src/scan/mod.rs +++ b/crates/iceberg/src/scan/mod.rs @@ -1795,6 +1795,7 @@ pub mod tests { deletes: vec![], sequence_number: 0, equality_ids: vec![], + file_size_in_bytes: 0, }; test_fn(task); @@ -1812,6 +1813,7 @@ pub mod tests { deletes: vec![], sequence_number: 0, equality_ids: vec![], + file_size_in_bytes: 0, }; test_fn(task); } diff --git a/crates/iceberg/src/scan/task.rs b/crates/iceberg/src/scan/task.rs index 210eb1c1bc..9ff5f6353c 100644 --- a/crates/iceberg/src/scan/task.rs +++ b/crates/iceberg/src/scan/task.rs @@ -63,6 +63,9 @@ pub struct FileScanTask { pub sequence_number: i64, /// equality ids pub equality_ids: Vec, + + /// The size of the file in bytes. + pub file_size_in_bytes: u64, } impl FileScanTask { @@ -127,6 +130,7 @@ impl From<&DeleteFileContext> for FileScanTask { deletes: vec![], sequence_number: ctx.manifest_entry.sequence_number().unwrap_or(0), equality_ids: ctx.manifest_entry.data_file().equality_ids().to_vec(), + file_size_in_bytes: ctx.manifest_entry.data_file().file_size_in_bytes(), } } } diff --git a/crates/iceberg/src/writer/file_writer/location_generator.rs b/crates/iceberg/src/writer/file_writer/location_generator.rs index 3f5d4ee180..29f674cfd9 100644 --- a/crates/iceberg/src/writer/file_writer/location_generator.rs +++ b/crates/iceberg/src/writer/file_writer/location_generator.rs @@ -39,7 +39,8 @@ const DEFAULT_DATA_DIR: &str = "/data"; /// `DefaultLocationGenerator` used to generate the data dir location of data file. /// The location is generated based on the table location and the data location in table properties. pub struct DefaultLocationGenerator { - dir_path: String, + /// The data dir path of the table. + pub dir_path: String, } impl DefaultLocationGenerator { diff --git a/crates/integrations/datafusion/src/lib.rs b/crates/integrations/datafusion/src/lib.rs index b7b927fdde..b7355296a8 100644 --- a/crates/integrations/datafusion/src/lib.rs +++ b/crates/integrations/datafusion/src/lib.rs @@ -21,7 +21,7 @@ pub use catalog::*; mod error; pub use error::*; -mod physical_plan; +pub mod physical_plan; mod schema; mod table; pub use table::table_provider_factory::IcebergTableProviderFactory; diff --git a/crates/integrations/datafusion/src/physical_plan/mod.rs b/crates/integrations/datafusion/src/physical_plan/mod.rs index 2fab109d72..d5495b5231 100644 --- a/crates/integrations/datafusion/src/physical_plan/mod.rs +++ b/crates/integrations/datafusion/src/physical_plan/mod.rs @@ -15,5 +15,5 @@ // specific language governing permissions and limitations // under the License. -pub(crate) mod expr_to_predicate; +pub mod expr_to_predicate; pub(crate) mod scan;