diff --git a/crates/iceberg/src/lib.rs b/crates/iceberg/src/lib.rs index 7d652d8b0..9ceadcac8 100644 --- a/crates/iceberg/src/lib.rs +++ b/crates/iceberg/src/lib.rs @@ -51,3 +51,5 @@ mod scan; pub mod expr; pub mod transaction; pub mod transform; + +pub mod writer; diff --git a/crates/iceberg/src/writer/file_writer/mod.rs b/crates/iceberg/src/writer/file_writer/mod.rs new file mode 100644 index 000000000..c8251fde7 --- /dev/null +++ b/crates/iceberg/src/writer/file_writer/mod.rs @@ -0,0 +1,39 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! This module contains the writer for data file format supported by iceberg: parquet, orc. + +use super::{CurrentFileStatus, DefaultOutput}; +use crate::Result; +use arrow_array::RecordBatch; +use futures::Future; + +/// File writer builder trait. +pub trait FileWriterBuilder: Send + Clone + 'static { + /// The associated file writer type. + type R: FileWriter; + /// Build file writer. + fn build(self) -> impl Future> + Send; +} + +/// File writer focus on writing record batch to different physical file format.(Such as parquet. orc) +pub trait FileWriter: Send + CurrentFileStatus + 'static { + /// Write record batch to file. + fn write(&mut self, batch: &RecordBatch) -> impl Future> + Send; + /// Close file writer. + fn close(self) -> impl Future> + Send; +} diff --git a/crates/iceberg/src/writer/mod.rs b/crates/iceberg/src/writer/mod.rs new file mode 100644 index 000000000..ac79d7bd4 --- /dev/null +++ b/crates/iceberg/src/writer/mod.rs @@ -0,0 +1,35 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! The iceberg writer module. + +use crate::spec::DataFileBuilder; + +pub mod file_writer; + +type DefaultOutput = Vec; + +/// The current file status of iceberg writer. It implement for the writer which write a single +/// file. +pub trait CurrentFileStatus { + /// Get the current file path. + fn current_file_path(&self) -> String; + /// Get the current file row number. + fn current_row_num(&self) -> usize; + /// Get the current file written size. + fn current_written_size(&self) -> usize; +}