diff --git a/Cargo.lock b/Cargo.lock index 0a2987a89b..dfd8ea8fbe 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3547,6 +3547,7 @@ dependencies = [ "typed-builder 0.20.1", "url", "uuid", + "waker-set", "zstd", ] @@ -7514,6 +7515,16 @@ version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5c3082ca00d5a5ef149bb8b555a72ae84c9c59f7250f013ac822ac2e49b19c64" +[[package]] +name = "waker-set" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e958152c46345e1af5c61812030ac85200573a0b384c137e83ce2c01ac4bc07" +dependencies = [ + "crossbeam-utils", + "slab", +] + [[package]] name = "walkdir" version = "2.5.0" diff --git a/crates/iceberg/src/arrow/caching_delete_file_loader.rs b/crates/iceberg/src/arrow/caching_delete_file_loader.rs new file mode 100644 index 0000000000..a48ebe5dfd --- /dev/null +++ b/crates/iceberg/src/arrow/caching_delete_file_loader.rs @@ -0,0 +1,324 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::collections::HashMap; + +use futures::{StreamExt, TryStreamExt}; +use tokio::sync::oneshot::{Receiver, channel}; + +use super::delete_filter::DeleteFilter; +use crate::arrow::delete_file_loader::BasicDeleteFileLoader; +use crate::delete_vector::DeleteVector; +use crate::expr::Predicate; +use crate::io::FileIO; +use crate::scan::{ArrowRecordBatchStream, FileScanTaskDeleteFile}; +use crate::spec::{DataContentType, SchemaRef}; +use crate::{Error, ErrorKind, Result}; + +#[derive(Clone, Debug)] +pub(crate) struct CachingDeleteFileLoader { + basic_delete_file_loader: BasicDeleteFileLoader, + concurrency_limit_data_files: usize, +} + +// Intermediate context during processing of a delete file task. +enum DeleteFileContext { + // TODO: Delete Vector loader from Puffin files + ExistingEqDel, + PosDels(ArrowRecordBatchStream), + FreshEqDel { + batch_stream: ArrowRecordBatchStream, + sender: tokio::sync::oneshot::Sender, + }, +} + +// Final result of the processing of a delete file task before +// results are fully merged into the DeleteFileManager's state +enum ParsedDeleteFileContext { + DelVecs(HashMap), + EqDel, +} + +#[allow(unused_variables)] +impl CachingDeleteFileLoader { + pub(crate) fn new(file_io: FileIO, concurrency_limit_data_files: usize) -> Self { + CachingDeleteFileLoader { + basic_delete_file_loader: BasicDeleteFileLoader::new(file_io), + concurrency_limit_data_files, + } + } + + /// Initiates loading of all deletes for all the specified tasks + /// + /// Returned future completes once all positional deletes and delete vectors + /// have loaded. EQ deletes are not waited for in this method but the returned + /// DeleteFilter will await their loading when queried for them. + /// + /// * Create a single stream of all delete file tasks irrespective of type, + /// so that we can respect the combined concurrency limit + /// * We then process each in two phases: load and parse. + /// * for positional deletes the load phase instantiates an ArrowRecordBatchStream to + /// stream the file contents out + /// * for eq deletes, we first check if the EQ delete is already loaded or being loaded by + /// another concurrently processing data file scan task. If it is, we skip it. + /// If not, the DeleteFilter is updated to contain a notifier to prevent other data file + /// tasks from starting to load the same equality delete file. We spawn a task to load + /// the EQ delete's record batch stream, convert it to a predicate, update the delete filter, + /// and notify any task that was waiting for it. + /// * When this gets updated to add support for delete vectors, the load phase will return + /// a PuffinReader for them. + /// * The parse phase parses each record batch stream according to its associated data type. + /// The result of this is a map of data file paths to delete vectors for the positional + /// delete tasks (and in future for the delete vector tasks). For equality delete + /// file tasks, this results in an unbound Predicate. + /// * The unbound Predicates resulting from equality deletes are sent to their associated oneshot + /// channel to store them in the right place in the delete file managers state. + /// * The results of all of these futures are awaited on in parallel with the specified + /// level of concurrency and collected into a vec. We then combine all the delete + /// vector maps that resulted from any positional delete or delete vector files into a + /// single map and persist it in the state. + /// + /// + /// Conceptually, the data flow is like this: + /// ```none + /// FileScanTaskDeleteFile + /// | + /// Skip Started EQ Deletes + /// | + /// | + /// [load recordbatch stream / puffin] + /// DeleteFileContext + /// | + /// | + /// +-----------------------------+--------------------------+ + /// Pos Del Del Vec (Not yet Implemented) EQ Del + /// | | | + /// [parse pos del stream] [parse del vec puffin] [parse eq del] + /// HashMap HashMap (Predicate, Sender) + /// | | | + /// | | [persist to state] + /// | | () + /// | | | + /// +-----------------------------+--------------------------+ + /// | + /// [buffer unordered] + /// | + /// [combine del vectors] + /// HashMap + /// | + /// [persist del vectors to state] + /// () + /// | + /// | + /// [join!] + /// ``` + pub(crate) fn load_deletes( + &self, + delete_file_entries: &[FileScanTaskDeleteFile], + schema: SchemaRef, + ) -> Receiver> { + let (tx, rx) = channel(); + let del_filter = DeleteFilter::default(); + + let stream_items = delete_file_entries + .iter() + .map(|t| { + ( + t.clone(), + self.basic_delete_file_loader.clone(), + del_filter.clone(), + schema.clone(), + ) + }) + .collect::>(); + let task_stream = futures::stream::iter(stream_items); + + let del_filter = del_filter.clone(); + let concurrency_limit_data_files = self.concurrency_limit_data_files; + let basic_delete_file_loader = self.basic_delete_file_loader.clone(); + crate::runtime::spawn(async move { + let result = async move { + let mut del_filter = del_filter; + let basic_delete_file_loader = basic_delete_file_loader.clone(); + + let results: Vec = task_stream + .map(move |(task, file_io, del_filter, schema)| { + let basic_delete_file_loader = basic_delete_file_loader.clone(); + async move { + Self::load_file_for_task( + &task, + basic_delete_file_loader.clone(), + del_filter, + schema, + ) + .await + } + }) + .map(move |ctx| { + Ok(async { Self::parse_file_content_for_task(ctx.await?).await }) + }) + .try_buffer_unordered(concurrency_limit_data_files) + .try_collect::>() + .await?; + + for item in results { + if let ParsedDeleteFileContext::DelVecs(hash_map) = item { + for (data_file_path, delete_vector) in hash_map.into_iter() { + del_filter.upsert_delete_vector(data_file_path, delete_vector); + } + } + } + + Ok(del_filter) + } + .await; + + let _ = tx.send(result); + }); + + rx + } + + async fn load_file_for_task( + task: &FileScanTaskDeleteFile, + basic_delete_file_loader: BasicDeleteFileLoader, + del_filter: DeleteFilter, + schema: SchemaRef, + ) -> Result { + match task.file_type { + DataContentType::PositionDeletes => Ok(DeleteFileContext::PosDels( + basic_delete_file_loader + .parquet_to_batch_stream(&task.file_path) + .await?, + )), + + DataContentType::EqualityDeletes => { + let Some(notify) = del_filter.try_start_eq_del_load(&task.file_path) else { + return Ok(DeleteFileContext::ExistingEqDel); + }; + + let (sender, receiver) = channel(); + del_filter.insert_equality_delete(&task.file_path, receiver); + + Ok(DeleteFileContext::FreshEqDel { + batch_stream: BasicDeleteFileLoader::evolve_schema( + basic_delete_file_loader + .parquet_to_batch_stream(&task.file_path) + .await?, + schema, + ) + .await?, + sender, + }) + } + + DataContentType::Data => Err(Error::new( + ErrorKind::Unexpected, + "tasks with files of type Data not expected here", + )), + } + } + + async fn parse_file_content_for_task( + ctx: DeleteFileContext, + ) -> Result { + match ctx { + DeleteFileContext::ExistingEqDel => Ok(ParsedDeleteFileContext::EqDel), + DeleteFileContext::PosDels(batch_stream) => { + let del_vecs = + Self::parse_positional_deletes_record_batch_stream(batch_stream).await?; + Ok(ParsedDeleteFileContext::DelVecs(del_vecs)) + } + DeleteFileContext::FreshEqDel { + sender, + batch_stream, + } => { + let predicate = + Self::parse_equality_deletes_record_batch_stream(batch_stream).await?; + + sender + .send(predicate) + .map_err(|err| { + Error::new( + ErrorKind::Unexpected, + "Could not send eq delete predicate to state", + ) + }) + .map(|_| ParsedDeleteFileContext::EqDel) + } + } + } + + /// Parses a record batch stream coming from positional delete files + /// + /// Returns a map of data file path to a delete vector + async fn parse_positional_deletes_record_batch_stream( + stream: ArrowRecordBatchStream, + ) -> Result> { + // TODO + + Err(Error::new( + ErrorKind::FeatureUnsupported, + "parsing of positional deletes is not yet supported", + )) + } + + /// Parses record batch streams from individual equality delete files + /// + /// Returns an unbound Predicate for each batch stream + async fn parse_equality_deletes_record_batch_stream( + streams: ArrowRecordBatchStream, + ) -> Result { + // TODO + + Err(Error::new( + ErrorKind::FeatureUnsupported, + "parsing of equality deletes is not yet supported", + )) + } +} + +#[cfg(test)] +mod tests { + use tempfile::TempDir; + + use super::*; + use crate::arrow::delete_file_loader::tests::setup; + + #[tokio::test] + async fn test_delete_file_manager_load_deletes() { + let tmp_dir = TempDir::new().unwrap(); + let table_location = tmp_dir.path(); + let file_io = FileIO::from_path(table_location.as_os_str().to_str().unwrap()) + .unwrap() + .build() + .unwrap(); + + // Note that with the delete file parsing not yet in place, all we can test here is that + // the call to the loader fails with the expected FeatureUnsupportedError. + let delete_file_manager = CachingDeleteFileLoader::new(file_io.clone(), 10); + + let file_scan_tasks = setup(table_location); + + let result = delete_file_manager + .load_deletes(&file_scan_tasks[0].deletes, file_scan_tasks[0].schema_ref()) + .await + .unwrap(); + + assert!(result.is_err_and(|e| e.kind() == ErrorKind::FeatureUnsupported)); + } +} diff --git a/crates/iceberg/src/arrow/delete_file_loader.rs b/crates/iceberg/src/arrow/delete_file_loader.rs new file mode 100644 index 0000000000..802ea794c4 --- /dev/null +++ b/crates/iceberg/src/arrow/delete_file_loader.rs @@ -0,0 +1,273 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::sync::Arc; + +use futures::{StreamExt, TryStreamExt}; + +use crate::arrow::ArrowReader; +use crate::arrow::record_batch_transformer::RecordBatchTransformer; +use crate::io::FileIO; +use crate::scan::{ArrowRecordBatchStream, FileScanTaskDeleteFile}; +use crate::spec::{Schema, SchemaRef}; +use crate::{Error, ErrorKind, Result}; + +/// Delete File Loader +#[allow(unused)] +#[async_trait::async_trait] +pub trait DeleteFileLoader { + /// Read the delete file referred to in the task + /// + /// Returns the contents of the delete file as a RecordBatch stream. Applies schema evolution. + async fn read_delete_file( + &self, + task: &FileScanTaskDeleteFile, + schema: SchemaRef, + ) -> Result; +} + +#[derive(Clone, Debug)] +pub(crate) struct BasicDeleteFileLoader { + file_io: FileIO, +} + +#[allow(unused_variables)] +impl BasicDeleteFileLoader { + pub fn new(file_io: FileIO) -> Self { + BasicDeleteFileLoader { file_io } + } + /// Loads a RecordBatchStream for a given datafile. + pub(crate) async fn parquet_to_batch_stream( + &self, + data_file_path: &str, + ) -> Result { + /* + Essentially a super-cut-down ArrowReader. We can't use ArrowReader directly + as that introduces a circular dependency. + */ + let record_batch_stream = ArrowReader::create_parquet_record_batch_stream_builder( + data_file_path, + self.file_io.clone(), + false, + ) + .await? + .build()? + .map_err(|e| Error::new(ErrorKind::Unexpected, format!("{}", e))); + + Ok(Box::pin(record_batch_stream) as ArrowRecordBatchStream) + } + + /// Evolves the schema of the RecordBatches from an equality delete file + pub(crate) async fn evolve_schema( + record_batch_stream: ArrowRecordBatchStream, + target_schema: Arc, + ) -> Result { + let eq_ids = target_schema + .as_ref() + .field_id_to_name_map() + .keys() + .cloned() + .collect::>(); + + let mut record_batch_transformer = + RecordBatchTransformer::build(target_schema.clone(), &eq_ids); + + let record_batch_stream = record_batch_stream.map(move |record_batch| { + record_batch.and_then(|record_batch| { + record_batch_transformer.process_record_batch(record_batch) + }) + }); + + Ok(Box::pin(record_batch_stream) as ArrowRecordBatchStream) + } +} + +#[async_trait::async_trait] +impl DeleteFileLoader for BasicDeleteFileLoader { + async fn read_delete_file( + &self, + task: &FileScanTaskDeleteFile, + schema: SchemaRef, + ) -> Result { + let raw_batch_stream = self.parquet_to_batch_stream(&task.file_path).await?; + + Self::evolve_schema(raw_batch_stream, schema).await + } +} + +#[cfg(test)] +pub(crate) mod tests { + use std::collections::HashMap; + use std::fs::File; + use std::path::Path; + use std::sync::Arc; + + use arrow_array::{Int64Array, RecordBatch, StringArray}; + use arrow_schema::Schema as ArrowSchema; + use parquet::arrow::{ArrowWriter, PARQUET_FIELD_ID_META_KEY}; + use parquet::basic::Compression; + use parquet::file::properties::WriterProperties; + use tempfile::TempDir; + + use super::*; + use crate::scan::FileScanTask; + use crate::spec::{DataContentType, DataFileFormat, Schema}; + + type ArrowSchemaRef = Arc; + + const FIELD_ID_POSITIONAL_DELETE_FILE_PATH: u64 = 2147483546; + const FIELD_ID_POSITIONAL_DELETE_POS: u64 = 2147483545; + + #[tokio::test] + async fn test_basic_delete_file_loader_read_delete_file() { + let tmp_dir = TempDir::new().unwrap(); + let table_location = tmp_dir.path(); + let file_io = FileIO::from_path(table_location.as_os_str().to_str().unwrap()) + .unwrap() + .build() + .unwrap(); + + // Note that with the delete file parsing not yet in place, all we can test here is that + // the call to the loader fails with the expected FeatureUnsupportedError. + let delete_file_loader = BasicDeleteFileLoader::new(file_io.clone()); + + let file_scan_tasks = setup(table_location); + + let result = delete_file_loader + .read_delete_file( + &file_scan_tasks[0].deletes[0], + file_scan_tasks[0].schema_ref(), + ) + .await + .unwrap(); + + let result = result.try_collect::>().await.unwrap(); + + assert_eq!(result.len(), 1); + } + + pub(crate) fn setup(table_location: &Path) -> Vec { + let data_file_schema = Arc::new(Schema::builder().build().unwrap()); + let positional_delete_schema = create_pos_del_schema(); + + let file_path_values = vec![format!("{}/1.parquet", table_location.to_str().unwrap()); 8]; + let pos_values = vec![0, 1, 3, 5, 6, 8, 1022, 1023]; + + let file_path_col = Arc::new(StringArray::from_iter_values(file_path_values)); + let pos_col = Arc::new(Int64Array::from_iter_values(pos_values)); + + let props = WriterProperties::builder() + .set_compression(Compression::SNAPPY) + .build(); + + for n in 1..=3 { + let positional_deletes_to_write = + RecordBatch::try_new(positional_delete_schema.clone(), vec![ + file_path_col.clone(), + pos_col.clone(), + ]) + .unwrap(); + + let file = File::create(format!( + "{}/pos-del-{}.parquet", + table_location.to_str().unwrap(), + n + )) + .unwrap(); + let mut writer = ArrowWriter::try_new( + file, + positional_deletes_to_write.schema(), + Some(props.clone()), + ) + .unwrap(); + + writer + .write(&positional_deletes_to_write) + .expect("Writing batch"); + + // writer must be closed to write footer + writer.close().unwrap(); + } + + let pos_del_1 = FileScanTaskDeleteFile { + file_path: format!("{}/pos-del-1.parquet", table_location.to_str().unwrap()), + file_type: DataContentType::PositionDeletes, + partition_spec_id: 0, + equality_ids: vec![], + }; + + let pos_del_2 = FileScanTaskDeleteFile { + file_path: format!("{}/pos-del-2.parquet", table_location.to_str().unwrap()), + file_type: DataContentType::PositionDeletes, + partition_spec_id: 0, + equality_ids: vec![], + }; + + let pos_del_3 = FileScanTaskDeleteFile { + file_path: format!("{}/pos-del-3.parquet", table_location.to_str().unwrap()), + file_type: DataContentType::PositionDeletes, + partition_spec_id: 0, + equality_ids: vec![], + }; + + let file_scan_tasks = vec![ + FileScanTask { + start: 0, + length: 0, + record_count: None, + data_file_path: "".to_string(), + data_file_content: DataContentType::Data, + data_file_format: DataFileFormat::Parquet, + schema: data_file_schema.clone(), + project_field_ids: vec![], + predicate: None, + deletes: vec![pos_del_1, pos_del_2.clone()], + }, + FileScanTask { + start: 0, + length: 0, + record_count: None, + data_file_path: "".to_string(), + data_file_content: DataContentType::Data, + data_file_format: DataFileFormat::Parquet, + schema: data_file_schema.clone(), + project_field_ids: vec![], + predicate: None, + deletes: vec![pos_del_2, pos_del_3], + }, + ]; + + file_scan_tasks + } + + pub(crate) fn create_pos_del_schema() -> ArrowSchemaRef { + let fields = vec![ + arrow_schema::Field::new("file_path", arrow_schema::DataType::Utf8, false) + .with_metadata(HashMap::from([( + PARQUET_FIELD_ID_META_KEY.to_string(), + FIELD_ID_POSITIONAL_DELETE_FILE_PATH.to_string(), + )])), + arrow_schema::Field::new("pos", arrow_schema::DataType::Int64, false).with_metadata( + HashMap::from([( + PARQUET_FIELD_ID_META_KEY.to_string(), + FIELD_ID_POSITIONAL_DELETE_POS.to_string(), + )]), + ), + ]; + Arc::new(arrow_schema::Schema::new(fields)) + } +} diff --git a/crates/iceberg/src/arrow/delete_file_manager.rs b/crates/iceberg/src/arrow/delete_file_manager.rs deleted file mode 100644 index e1ca476793..0000000000 --- a/crates/iceberg/src/arrow/delete_file_manager.rs +++ /dev/null @@ -1,95 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::sync::Arc; - -use crate::delete_vector::DeleteVector; -use crate::expr::BoundPredicate; -use crate::io::FileIO; -use crate::scan::{ArrowRecordBatchStream, FileScanTaskDeleteFile}; -use crate::spec::SchemaRef; -use crate::{Error, ErrorKind, Result}; - -#[allow(unused)] -pub trait DeleteFileManager { - /// Read the delete file referred to in the task - /// - /// Returns the raw contents of the delete file as a RecordBatch stream - fn read_delete_file(task: &FileScanTaskDeleteFile) -> Result; -} - -#[allow(unused)] -#[derive(Clone, Debug)] -pub(crate) struct CachingDeleteFileManager { - file_io: FileIO, - concurrency_limit_data_files: usize, -} - -impl DeleteFileManager for CachingDeleteFileManager { - fn read_delete_file(_task: &FileScanTaskDeleteFile) -> Result { - // TODO, implementation in https://github.com/apache/iceberg-rust/pull/982 - - Err(Error::new( - ErrorKind::FeatureUnsupported, - "Reading delete files is not yet supported", - )) - } -} - -#[allow(unused_variables)] -impl CachingDeleteFileManager { - pub fn new(file_io: FileIO, concurrency_limit_data_files: usize) -> CachingDeleteFileManager { - Self { - file_io, - concurrency_limit_data_files, - } - } - - pub(crate) async fn load_deletes( - &self, - delete_file_entries: Vec, - ) -> Result<()> { - // TODO - - if !delete_file_entries.is_empty() { - Err(Error::new( - ErrorKind::FeatureUnsupported, - "Reading delete files is not yet supported", - )) - } else { - Ok(()) - } - } - - pub(crate) fn build_delete_predicate( - &self, - snapshot_schema: SchemaRef, - ) -> Result> { - // TODO - - Ok(None) - } - - pub(crate) fn get_positional_delete_indexes_for_data_file( - &self, - data_file_path: &str, - ) -> Option> { - // TODO - - None - } -} diff --git a/crates/iceberg/src/arrow/delete_filter.rs b/crates/iceberg/src/arrow/delete_filter.rs new file mode 100644 index 0000000000..e2acab1923 --- /dev/null +++ b/crates/iceberg/src/arrow/delete_filter.rs @@ -0,0 +1,353 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::collections::HashMap; +use std::sync::{Arc, Mutex, RwLock}; + +use tokio::sync::Notify; +use tokio::sync::oneshot::Receiver; + +use crate::delete_vector::DeleteVector; +use crate::expr::Predicate::AlwaysTrue; +use crate::expr::{Bind, BoundPredicate, Predicate}; +use crate::scan::{FileScanTask, FileScanTaskDeleteFile}; +use crate::spec::DataContentType; +use crate::{Error, ErrorKind, Result}; + +#[derive(Debug)] +enum EqDelState { + Loading(Arc), + Loaded(Predicate), +} + +#[derive(Debug, Default)] +struct DeleteFileFilterState { + delete_vectors: HashMap>>, + equality_deletes: HashMap, +} + +#[derive(Clone, Debug, Default)] +pub(crate) struct DeleteFilter { + state: Arc>, +} + +impl DeleteFilter { + /// Retrieve a delete vector for the data file associated with a given file scan task + pub(crate) fn get_delete_vector( + &self, + file_scan_task: &FileScanTask, + ) -> Option>> { + self.get_delete_vector_for_path(file_scan_task.data_file_path()) + } + + /// Retrieve a delete vector for a data file + pub(crate) fn get_delete_vector_for_path( + &self, + delete_file_path: &str, + ) -> Option>> { + self.state + .read() + .ok() + .and_then(|st| st.delete_vectors.get(delete_file_path).cloned()) + } + + pub(crate) fn try_start_eq_del_load(&self, file_path: &str) -> Option> { + let mut state = self.state.write().unwrap(); + + if !state.equality_deletes.contains_key(file_path) { + return None; + } + + let notifier = Arc::new(Notify::new()); + state + .equality_deletes + .insert(file_path.to_string(), EqDelState::Loading(notifier.clone())); + + Some(notifier) + } + + /// Retrieve the equality delete predicate for a given eq delete file path + pub(crate) async fn get_equality_delete_predicate_for_delete_file_path( + &self, + file_path: &str, + ) -> Option { + let notifier = { + match self.state.read().unwrap().equality_deletes.get(file_path) { + None => return None, + Some(EqDelState::Loading(notifier)) => notifier.clone(), + Some(EqDelState::Loaded(predicate)) => { + return Some(predicate.clone()); + } + } + }; + + notifier.notified().await; + + match self.state.read().unwrap().equality_deletes.get(file_path) { + Some(EqDelState::Loaded(predicate)) => Some(predicate.clone()), + _ => unreachable!("Cannot be any other state than loaded"), + } + } + + /// Builds eq delete predicate for the provided task. + pub(crate) async fn build_equality_delete_predicate( + &self, + file_scan_task: &FileScanTask, + ) -> Result> { + // * Filter the task's deletes into just the Equality deletes + // * Retrieve the unbound predicate for each from self.state.equality_deletes + // * Logical-AND them all together to get a single combined `Predicate` + // * Bind the predicate to the task's schema to get a `BoundPredicate` + + let mut combined_predicate = AlwaysTrue; + for delete in &file_scan_task.deletes { + if !is_equality_delete(delete) { + continue; + } + + let Some(predicate) = self + .get_equality_delete_predicate_for_delete_file_path(&delete.file_path) + .await + else { + return Err(Error::new( + ErrorKind::Unexpected, + format!( + "Missing predicate for equality delete file '{}'", + delete.file_path + ), + )); + }; + + combined_predicate = combined_predicate.and(predicate); + } + + if combined_predicate == AlwaysTrue { + return Ok(None); + } + + // TODO: handle case-insensitive case + let bound_predicate = combined_predicate.bind(file_scan_task.schema.clone(), false)?; + Ok(Some(bound_predicate)) + } + + pub(crate) fn upsert_delete_vector( + &mut self, + data_file_path: String, + delete_vector: DeleteVector, + ) { + let mut state = self.state.write().unwrap(); + + let Some(entry) = state.delete_vectors.get_mut(&data_file_path) else { + state + .delete_vectors + .insert(data_file_path, Arc::new(Mutex::new(delete_vector))); + return; + }; + + *entry.lock().unwrap() |= delete_vector; + } + + pub(crate) fn insert_equality_delete( + &self, + delete_file_path: &str, + eq_del: Receiver, + ) { + let notify = Arc::new(Notify::new()); + { + let mut state = self.state.write().unwrap(); + state.equality_deletes.insert( + delete_file_path.to_string(), + EqDelState::Loading(notify.clone()), + ); + } + + let state = self.state.clone(); + let delete_file_path = delete_file_path.to_string(); + crate::runtime::spawn(async move { + let eq_del = eq_del.await.unwrap(); + { + let mut state = state.write().unwrap(); + state + .equality_deletes + .insert(delete_file_path, EqDelState::Loaded(eq_del)); + } + notify.notify_waiters(); + }); + } +} + +pub(crate) fn is_equality_delete(f: &FileScanTaskDeleteFile) -> bool { + matches!(f.file_type, DataContentType::EqualityDeletes) +} + +#[cfg(test)] +mod tests { + use std::fs::File; + use std::path::Path; + use std::sync::Arc; + + use arrow_array::{Int64Array, RecordBatch, StringArray}; + use arrow_schema::Schema as ArrowSchema; + use parquet::arrow::{ArrowWriter, PARQUET_FIELD_ID_META_KEY}; + use parquet::basic::Compression; + use parquet::file::properties::WriterProperties; + use tempfile::TempDir; + + use super::*; + use crate::arrow::caching_delete_file_loader::CachingDeleteFileLoader; + use crate::io::FileIO; + use crate::spec::{DataFileFormat, Schema}; + + type ArrowSchemaRef = Arc; + + const FIELD_ID_POSITIONAL_DELETE_FILE_PATH: u64 = 2147483546; + const FIELD_ID_POSITIONAL_DELETE_POS: u64 = 2147483545; + + #[tokio::test] + async fn test_delete_file_manager_load_deletes() { + let tmp_dir = TempDir::new().unwrap(); + let table_location = tmp_dir.path(); + let file_io = FileIO::from_path(table_location.as_os_str().to_str().unwrap()) + .unwrap() + .build() + .unwrap(); + + // Note that with the delete file parsing not yet in place, all we can test here is that + // the call to the loader fails with the expected FeatureUnsupportedError. + let delete_file_manager = CachingDeleteFileLoader::new(file_io.clone(), 10); + + let file_scan_tasks = setup(table_location); + + let result = delete_file_manager + .load_deletes(&file_scan_tasks[0].deletes, file_scan_tasks[0].schema_ref()) + .await + .unwrap(); + + assert!(result.is_err_and(|e| e.kind() == ErrorKind::FeatureUnsupported)); + } + + fn setup(table_location: &Path) -> Vec { + let data_file_schema = Arc::new(Schema::builder().build().unwrap()); + let positional_delete_schema = create_pos_del_schema(); + + let file_path_values = vec![format!("{}/1.parquet", table_location.to_str().unwrap()); 8]; + let pos_values = vec![0, 1, 3, 5, 6, 8, 1022, 1023]; + + let file_path_col = Arc::new(StringArray::from_iter_values(file_path_values)); + let pos_col = Arc::new(Int64Array::from_iter_values(pos_values)); + + let props = WriterProperties::builder() + .set_compression(Compression::SNAPPY) + .build(); + + for n in 1..=3 { + let positional_deletes_to_write = + RecordBatch::try_new(positional_delete_schema.clone(), vec![ + file_path_col.clone(), + pos_col.clone(), + ]) + .unwrap(); + + let file = File::create(format!( + "{}/pos-del-{}.parquet", + table_location.to_str().unwrap(), + n + )) + .unwrap(); + let mut writer = ArrowWriter::try_new( + file, + positional_deletes_to_write.schema(), + Some(props.clone()), + ) + .unwrap(); + + writer + .write(&positional_deletes_to_write) + .expect("Writing batch"); + + // writer must be closed to write footer + writer.close().unwrap(); + } + + let pos_del_1 = FileScanTaskDeleteFile { + file_path: format!("{}/pos-del-1.parquet", table_location.to_str().unwrap()), + file_type: DataContentType::PositionDeletes, + partition_spec_id: 0, + equality_ids: vec![], + }; + + let pos_del_2 = FileScanTaskDeleteFile { + file_path: format!("{}/pos-del-2.parquet", table_location.to_str().unwrap()), + file_type: DataContentType::PositionDeletes, + partition_spec_id: 0, + equality_ids: vec![], + }; + + let pos_del_3 = FileScanTaskDeleteFile { + file_path: format!("{}/pos-del-3.parquet", table_location.to_str().unwrap()), + file_type: DataContentType::PositionDeletes, + partition_spec_id: 0, + equality_ids: vec![], + }; + + let file_scan_tasks = vec![ + FileScanTask { + start: 0, + length: 0, + record_count: None, + data_file_path: "".to_string(), + data_file_content: DataContentType::Data, + data_file_format: DataFileFormat::Parquet, + schema: data_file_schema.clone(), + project_field_ids: vec![], + predicate: None, + deletes: vec![pos_del_1, pos_del_2.clone()], + }, + FileScanTask { + start: 0, + length: 0, + record_count: None, + data_file_path: "".to_string(), + data_file_content: DataContentType::Data, + data_file_format: DataFileFormat::Parquet, + schema: data_file_schema.clone(), + project_field_ids: vec![], + predicate: None, + deletes: vec![pos_del_2, pos_del_3], + }, + ]; + + file_scan_tasks + } + + fn create_pos_del_schema() -> ArrowSchemaRef { + let fields = vec![ + arrow_schema::Field::new("file_path", arrow_schema::DataType::Utf8, false) + .with_metadata(HashMap::from([( + PARQUET_FIELD_ID_META_KEY.to_string(), + FIELD_ID_POSITIONAL_DELETE_FILE_PATH.to_string(), + )])), + arrow_schema::Field::new("pos", arrow_schema::DataType::Int64, false).with_metadata( + HashMap::from([( + PARQUET_FIELD_ID_META_KEY.to_string(), + FIELD_ID_POSITIONAL_DELETE_POS.to_string(), + )]), + ), + ]; + Arc::new(arrow_schema::Schema::new(fields)) + } +} diff --git a/crates/iceberg/src/arrow/mod.rs b/crates/iceberg/src/arrow/mod.rs index 56caeaf559..949f842412 100644 --- a/crates/iceberg/src/arrow/mod.rs +++ b/crates/iceberg/src/arrow/mod.rs @@ -22,12 +22,15 @@ pub use schema::*; mod nan_val_cnt_visitor; pub(crate) use nan_val_cnt_visitor::*; - -pub(crate) mod delete_file_manager; +pub(crate) mod caching_delete_file_loader; +/// Delete File loader +pub mod delete_file_loader; +pub(crate) mod delete_filter; mod reader; pub(crate) mod record_batch_projector; pub(crate) mod record_batch_transformer; mod value; + pub use reader::*; pub use value::*; diff --git a/crates/iceberg/src/arrow/reader.rs b/crates/iceberg/src/arrow/reader.rs index a3462edec6..b1903a94db 100644 --- a/crates/iceberg/src/arrow/reader.rs +++ b/crates/iceberg/src/arrow/reader.rs @@ -42,7 +42,7 @@ use parquet::arrow::{PARQUET_FIELD_ID_META_KEY, ParquetRecordBatchStreamBuilder, use parquet::file::metadata::{ParquetMetaData, ParquetMetaDataReader, RowGroupMetaData}; use parquet::schema::types::{SchemaDescriptor, Type as ParquetType}; -use crate::arrow::delete_file_manager::CachingDeleteFileManager; +use crate::arrow::caching_delete_file_loader::CachingDeleteFileLoader; use crate::arrow::record_batch_transformer::RecordBatchTransformer; use crate::arrow::{arrow_schema_to_schema, get_arrow_datum}; use crate::delete_vector::DeleteVector; @@ -110,7 +110,7 @@ impl ArrowReaderBuilder { ArrowReader { batch_size: self.batch_size, file_io: self.file_io.clone(), - delete_file_manager: CachingDeleteFileManager::new( + delete_file_loader: CachingDeleteFileLoader::new( self.file_io.clone(), self.concurrency_limit_data_files, ), @@ -126,7 +126,7 @@ impl ArrowReaderBuilder { pub struct ArrowReader { batch_size: Option, file_io: FileIO, - delete_file_manager: CachingDeleteFileManager, + delete_file_loader: CachingDeleteFileLoader, /// the maximum number of data files that can be fetched at the same time concurrency_limit_data_files: usize, @@ -153,7 +153,7 @@ impl ArrowReader { task, batch_size, file_io, - self.delete_file_manager.clone(), + self.delete_file_loader.clone(), row_group_filtering_enabled, row_selection_enabled, ) @@ -167,26 +167,26 @@ impl ArrowReader { Ok(Box::pin(stream) as ArrowRecordBatchStream) } + #[allow(clippy::too_many_arguments)] async fn process_file_scan_task( task: FileScanTask, batch_size: Option, file_io: FileIO, - delete_file_manager: CachingDeleteFileManager, + delete_file_loader: CachingDeleteFileLoader, row_group_filtering_enabled: bool, row_selection_enabled: bool, ) -> Result { let should_load_page_index = (row_selection_enabled && task.predicate.is_some()) || !task.deletes.is_empty(); - // concurrently retrieve delete files and create RecordBatchStreamBuilder - let (_, mut record_batch_stream_builder) = try_join!( - delete_file_manager.load_deletes(task.deletes.clone()), - Self::create_parquet_record_batch_stream_builder( - &task.data_file_path, - file_io.clone(), - should_load_page_index, - ) - )?; + let delete_filter_rx = delete_file_loader.load_deletes(&task.deletes, task.schema.clone()); + + let mut record_batch_stream_builder = Self::create_parquet_record_batch_stream_builder( + &task.data_file_path, + file_io.clone(), + should_load_page_index, + ) + .await?; // Create a projection mask for the batch stream to select which columns in the // Parquet file that we want in the response @@ -208,7 +208,8 @@ impl ArrowReader { record_batch_stream_builder = record_batch_stream_builder.with_batch_size(batch_size); } - let delete_predicate = delete_file_manager.build_delete_predicate(task.schema.clone())?; + let delete_filter = delete_filter_rx.await.unwrap()?; + let delete_predicate = delete_filter.build_equality_delete_predicate(&task).await?; // In addition to the optional predicate supplied in the `FileScanTask`, // we also have an optional predicate resulting from equality delete files. @@ -276,15 +277,18 @@ impl ArrowReader { } } - let positional_delete_indexes = - delete_file_manager.get_positional_delete_indexes_for_data_file(&task.data_file_path); + let positional_delete_indexes = delete_filter.get_delete_vector(&task); if let Some(positional_delete_indexes) = positional_delete_indexes { - let delete_row_selection = Self::build_deletes_row_selection( - record_batch_stream_builder.metadata().row_groups(), - &selected_row_group_indices, - positional_delete_indexes.as_ref(), - )?; + let delete_row_selection = { + let positional_delete_indexes = positional_delete_indexes.lock().unwrap(); + + Self::build_deletes_row_selection( + record_batch_stream_builder.metadata().row_groups(), + &selected_row_group_indices, + &positional_delete_indexes, + ) + }?; // merge the row selection from the delete files with the row selection // from the filter predicate, if there is one from the filter predicate @@ -319,7 +323,7 @@ impl ArrowReader { Ok(Box::pin(record_batch_stream) as ArrowRecordBatchStream) } - async fn create_parquet_record_batch_stream_builder( + pub(crate) async fn create_parquet_record_batch_stream_builder( data_file_path: &str, file_io: FileIO, should_load_page_index: bool, @@ -1839,7 +1843,7 @@ message schema { /* cases to cover: * {skip|select} {first|intermediate|last} {one row|multiple rows} in - {first|imtermediate|last} {skipped|selected} row group + {first|intermediate|last} {skipped|selected} row group * row group selection disabled */ diff --git a/crates/iceberg/src/delete_file_index.rs b/crates/iceberg/src/delete_file_index.rs index 3f25bbda36..d8f7a872e1 100644 --- a/crates/iceberg/src/delete_file_index.rs +++ b/crates/iceberg/src/delete_file_index.rs @@ -16,29 +16,26 @@ // under the License. use std::collections::HashMap; -use std::future::Future; use std::ops::Deref; -use std::pin::Pin; use std::sync::{Arc, RwLock}; -use std::task::{Context, Poll}; use futures::StreamExt; use futures::channel::mpsc::{Sender, channel}; +use tokio::sync::Notify; use crate::runtime::spawn; use crate::scan::{DeleteFileContext, FileScanTaskDeleteFile}; use crate::spec::{DataContentType, DataFile, Struct}; -use crate::{Error, ErrorKind, Result}; /// Index of delete files -#[derive(Clone, Debug)] +#[derive(Debug, Clone)] pub(crate) struct DeleteFileIndex { state: Arc>, } #[derive(Debug)] enum DeleteFileIndexState { - Populating, + Populating(Arc), Populated(PopulatedDeleteFileIndex), } @@ -59,7 +56,10 @@ impl DeleteFileIndex { pub(crate) fn new() -> (DeleteFileIndex, Sender) { // TODO: what should the channel limit be? let (tx, rx) = channel(10); - let state = Arc::new(RwLock::new(DeleteFileIndexState::Populating)); + let notify = Arc::new(Notify::new()); + let state = Arc::new(RwLock::new(DeleteFileIndexState::Populating( + notify.clone(), + ))); let delete_file_stream = rx.boxed(); spawn({ @@ -69,8 +69,11 @@ impl DeleteFileIndex { let populated_delete_file_index = PopulatedDeleteFileIndex::new(delete_files); - let mut guard = state.write().unwrap(); - *guard = DeleteFileIndexState::Populated(populated_delete_file_index); + { + let mut guard = state.write().unwrap(); + *guard = DeleteFileIndexState::Populated(populated_delete_file_index); + } + notify.notify_waiters(); } }); @@ -78,17 +81,29 @@ impl DeleteFileIndex { } /// Gets all the delete files that apply to the specified data file. - /// - /// Returns a future that resolves to a Result> - pub(crate) fn get_deletes_for_data_file<'a>( + pub(crate) async fn get_deletes_for_data_file( &self, - data_file: &'a DataFile, + data_file: &DataFile, seq_num: Option, - ) -> DeletesForDataFile<'a> { - DeletesForDataFile { - state: self.state.clone(), - data_file, - seq_num, + ) -> Vec { + let notifier = { + let guard = self.state.read().unwrap(); + match *guard { + DeleteFileIndexState::Populating(ref notifier) => notifier.clone(), + DeleteFileIndexState::Populated(ref index) => { + return index.get_deletes_for_data_file(data_file, seq_num); + } + } + }; + + notifier.notified().await; + + let guard = self.state.read().unwrap(); + match guard.deref() { + DeleteFileIndexState::Populated(index) => { + index.get_deletes_for_data_file(data_file, seq_num) + } + _ => unreachable!("Cannot be any other state than loaded"), } } } @@ -99,7 +114,7 @@ impl PopulatedDeleteFileIndex { /// /// 1. The partition information is extracted from each delete file's manifest entry. /// 2. If the partition is empty and the delete file is not a positional delete, - /// it is added to the `global_delees` vector + /// it is added to the `global_deletes` vector /// 3. Otherwise, the delete file is added to one of two hash maps based on its content type. fn new(files: Vec) -> PopulatedDeleteFileIndex { let mut eq_deletes_by_partition: HashMap>> = @@ -193,26 +208,3 @@ impl PopulatedDeleteFileIndex { results } } - -/// Future for the `DeleteFileIndex::get_deletes_for_data_file` method -pub(crate) struct DeletesForDataFile<'a> { - state: Arc>, - data_file: &'a DataFile, - seq_num: Option, -} - -impl Future for DeletesForDataFile<'_> { - type Output = Result>; - - fn poll(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll { - match self.state.try_read() { - Ok(guard) => match guard.deref() { - DeleteFileIndexState::Populated(idx) => Poll::Ready(Ok( - idx.get_deletes_for_data_file(self.data_file, self.seq_num) - )), - _ => Poll::Pending, - }, - Err(err) => Poll::Ready(Err(Error::new(ErrorKind::Unexpected, err.to_string()))), - } - } -} diff --git a/crates/iceberg/src/delete_vector.rs b/crates/iceberg/src/delete_vector.rs index 7bde3c43d0..e4ab74f108 100644 --- a/crates/iceberg/src/delete_vector.rs +++ b/crates/iceberg/src/delete_vector.rs @@ -15,11 +15,13 @@ // specific language governing permissions and limitations // under the License. +use std::ops::BitOrAssign; + use roaring::RoaringTreemap; use roaring::bitmap::Iter; use roaring::treemap::BitmapIter; -#[allow(unused)] +#[derive(Debug, Default)] pub struct DeleteVector { inner: RoaringTreemap, } @@ -103,3 +105,9 @@ impl DeleteVectorIterator<'_> { inner.bitmap_iter.advance_to(lo); } } + +impl BitOrAssign for DeleteVector { + fn bitor_assign(&mut self, other: Self) { + self.inner.bitor_assign(&other.inner); + } +} diff --git a/crates/iceberg/src/scan/context.rs b/crates/iceberg/src/scan/context.rs index 6bfb12b23a..703cbd01a6 100644 --- a/crates/iceberg/src/scan/context.rs +++ b/crates/iceberg/src/scan/context.rs @@ -45,7 +45,7 @@ pub(crate) struct ManifestFileContext { object_cache: Arc, snapshot_schema: SchemaRef, expression_evaluator_cache: Arc, - delete_file_index: Option, + delete_file_index: DeleteFileIndex, } /// Wraps a [`ManifestEntryRef`] alongside the objects that are needed @@ -58,7 +58,7 @@ pub(crate) struct ManifestEntryContext { pub bound_predicates: Option>, pub partition_spec_id: i32, pub snapshot_schema: SchemaRef, - pub delete_file_index: Option, + pub delete_file_index: DeleteFileIndex, } impl ManifestFileContext { @@ -105,16 +105,13 @@ impl ManifestEntryContext { /// consume this `ManifestEntryContext`, returning a `FileScanTask` /// created from it pub(crate) async fn into_file_scan_task(self) -> Result { - let deletes = if let Some(delete_file_index) = self.delete_file_index { - delete_file_index - .get_deletes_for_data_file( - self.manifest_entry.data_file(), - self.manifest_entry.sequence_number(), - ) - .await? - } else { - vec![] - }; + let deletes = self + .delete_file_index + .get_deletes_for_data_file( + self.manifest_entry.data_file(), + self.manifest_entry.sequence_number(), + ) + .await; Ok(FileScanTask { start: 0, @@ -188,7 +185,8 @@ impl PlanContext { &self, manifest_list: Arc, tx_data: Sender, - delete_file_idx_and_tx: Option<(DeleteFileIndex, Sender)>, + delete_file_idx: DeleteFileIndex, + delete_file_tx: Sender, ) -> Result> + 'static>> { let manifest_files = manifest_list.entries().iter(); @@ -196,16 +194,10 @@ impl PlanContext { let mut filtered_mfcs = vec![]; for manifest_file in manifest_files { - let (delete_file_idx, tx) = if manifest_file.content == ManifestContentType::Deletes { - let Some((delete_file_idx, tx)) = delete_file_idx_and_tx.as_ref() else { - continue; - }; - (Some(delete_file_idx.clone()), tx.clone()) + let tx = if manifest_file.content == ManifestContentType::Deletes { + delete_file_tx.clone() } else { - ( - delete_file_idx_and_tx.as_ref().map(|x| x.0.clone()), - tx_data.clone(), - ) + tx_data.clone() }; let partition_bound_predicate = if self.predicate.is_some() { @@ -233,7 +225,7 @@ impl PlanContext { manifest_file, partition_bound_predicate, tx, - delete_file_idx, + delete_file_idx.clone(), ); filtered_mfcs.push(Ok(mfc)); @@ -247,7 +239,7 @@ impl PlanContext { manifest_file: &ManifestFile, partition_filter: Option>, sender: Sender, - delete_file_index: Option, + delete_file_index: DeleteFileIndex, ) -> ManifestFileContext { let bound_predicates = if let (Some(ref partition_bound_predicate), Some(snapshot_bound_predicate)) = diff --git a/crates/iceberg/src/scan/mod.rs b/crates/iceberg/src/scan/mod.rs index 8280d43080..f7a43a6475 100644 --- a/crates/iceberg/src/scan/mod.rs +++ b/crates/iceberg/src/scan/mod.rs @@ -59,11 +59,6 @@ pub struct TableScanBuilder<'a> { concurrency_limit_manifest_files: usize, row_group_filtering_enabled: bool, row_selection_enabled: bool, - - // TODO: defaults to false for now whilst delete file processing - // is still being worked on but will switch to a default of true - // once this work is complete - delete_file_processing_enabled: bool, } impl<'a> TableScanBuilder<'a> { @@ -82,7 +77,6 @@ impl<'a> TableScanBuilder<'a> { concurrency_limit_manifest_files: num_cpus, row_group_filtering_enabled: true, row_selection_enabled: false, - delete_file_processing_enabled: false, } } @@ -189,17 +183,6 @@ impl<'a> TableScanBuilder<'a> { self } - /// Determines whether to enable delete file processing (currently disabled by default) - /// - /// When disabled, delete files are ignored. - pub fn with_delete_file_processing_enabled( - mut self, - delete_file_processing_enabled: bool, - ) -> Self { - self.delete_file_processing_enabled = delete_file_processing_enabled; - self - } - /// Build the table scan. pub fn build(self) -> Result { let snapshot = match self.snapshot_id { @@ -226,7 +209,6 @@ impl<'a> TableScanBuilder<'a> { concurrency_limit_manifest_files: self.concurrency_limit_manifest_files, row_group_filtering_enabled: self.row_group_filtering_enabled, row_selection_enabled: self.row_selection_enabled, - delete_file_processing_enabled: self.delete_file_processing_enabled, }); }; current_snapshot_id.clone() @@ -317,7 +299,6 @@ impl<'a> TableScanBuilder<'a> { concurrency_limit_manifest_files: self.concurrency_limit_manifest_files, row_group_filtering_enabled: self.row_group_filtering_enabled, row_selection_enabled: self.row_selection_enabled, - delete_file_processing_enabled: self.delete_file_processing_enabled, }) } } @@ -346,7 +327,6 @@ pub struct TableScan { row_group_filtering_enabled: bool, row_selection_enabled: bool, - delete_file_processing_enabled: bool, } impl TableScan { @@ -368,12 +348,7 @@ impl TableScan { // used to stream the results back to the caller let (file_scan_task_tx, file_scan_task_rx) = channel(concurrency_limit_manifest_entries); - let delete_file_idx_and_tx: Option<(DeleteFileIndex, Sender)> = - if self.delete_file_processing_enabled { - Some(DeleteFileIndex::new()) - } else { - None - }; + let (delete_file_idx, delete_file_tx) = DeleteFileIndex::new(); let manifest_list = plan_context.get_manifest_list().await?; @@ -383,9 +358,8 @@ impl TableScan { let manifest_file_contexts = plan_context.build_manifest_file_contexts( manifest_list, manifest_entry_data_ctx_tx, - delete_file_idx_and_tx.as_ref().map(|(delete_file_idx, _)| { - (delete_file_idx.clone(), manifest_entry_delete_ctx_tx) - }), + delete_file_idx.clone(), + manifest_entry_delete_ctx_tx, )?; let mut channel_for_manifest_error = file_scan_task_tx.clone(); @@ -404,34 +378,30 @@ impl TableScan { }); let mut channel_for_data_manifest_entry_error = file_scan_task_tx.clone(); + let mut channel_for_delete_manifest_entry_error = file_scan_task_tx.clone(); - if let Some((_, delete_file_tx)) = delete_file_idx_and_tx { - let mut channel_for_delete_manifest_entry_error = file_scan_task_tx.clone(); - - // Process the delete file [`ManifestEntry`] stream in parallel - spawn(async move { - let result = manifest_entry_delete_ctx_rx - .map(|me_ctx| Ok((me_ctx, delete_file_tx.clone()))) - .try_for_each_concurrent( - concurrency_limit_manifest_entries, - |(manifest_entry_context, tx)| async move { - spawn(async move { - Self::process_delete_manifest_entry(manifest_entry_context, tx) - .await - }) - .await - }, - ) - .await; + // Process the delete file [`ManifestEntry`] stream in parallel + spawn(async move { + let result = manifest_entry_delete_ctx_rx + .map(|me_ctx| Ok((me_ctx, delete_file_tx.clone()))) + .try_for_each_concurrent( + concurrency_limit_manifest_entries, + |(manifest_entry_context, tx)| async move { + spawn(async move { + Self::process_delete_manifest_entry(manifest_entry_context, tx).await + }) + .await + }, + ) + .await; - if let Err(error) = result { - let _ = channel_for_delete_manifest_entry_error - .send(Err(error)) - .await; - } - }) - .await; - } + if let Err(error) = result { + let _ = channel_for_delete_manifest_entry_error + .send(Err(error)) + .await; + } + }) + .await; // Process the data file [`ManifestEntry`] stream in parallel spawn(async move { diff --git a/crates/integration_tests/tests/shared_tests/read_positional_deletes.rs b/crates/integration_tests/tests/shared_tests/read_positional_deletes.rs index 43a50c65f5..34085eeee4 100644 --- a/crates/integration_tests/tests/shared_tests/read_positional_deletes.rs +++ b/crates/integration_tests/tests/shared_tests/read_positional_deletes.rs @@ -37,11 +37,7 @@ async fn test_read_table_with_positional_deletes() { .await .unwrap(); - let scan = table - .scan() - .with_delete_file_processing_enabled(true) - .build() - .unwrap(); + let scan = table.scan().build().unwrap(); println!("{:?}", scan); let plan: Vec<_> = scan