From ee18dcf133a41d4f116453d8372bb61fca246e9a Mon Sep 17 00:00:00 2001 From: ZENOTME Date: Thu, 18 Jan 2024 11:43:12 +0800 Subject: [PATCH 1/2] init file writer interface --- crates/iceberg/src/lib.rs | 2 ++ crates/iceberg/src/writer/file_writer/mod.rs | 38 ++++++++++++++++++++ crates/iceberg/src/writer/mod.rs | 35 ++++++++++++++++++ 3 files changed, 75 insertions(+) create mode 100644 crates/iceberg/src/writer/file_writer/mod.rs create mode 100644 crates/iceberg/src/writer/mod.rs diff --git a/crates/iceberg/src/lib.rs b/crates/iceberg/src/lib.rs index 7d652d8b0..9ceadcac8 100644 --- a/crates/iceberg/src/lib.rs +++ b/crates/iceberg/src/lib.rs @@ -51,3 +51,5 @@ mod scan; pub mod expr; pub mod transaction; pub mod transform; + +pub mod writer; diff --git a/crates/iceberg/src/writer/file_writer/mod.rs b/crates/iceberg/src/writer/file_writer/mod.rs new file mode 100644 index 000000000..c1a9884e6 --- /dev/null +++ b/crates/iceberg/src/writer/file_writer/mod.rs @@ -0,0 +1,38 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! This module contains the writer for data file format supported by iceberg: parquet, orc. + +use super::{CurrentFileStatus, DefaultOutput}; +use crate::Result; +use arrow_array::RecordBatch; + +/// File writer builder trait. +pub trait FileWriterBuilder: Send + Clone + 'static { + /// The associated file writer type. + type R: FileWriter; + /// Build file writer. + fn build(self) -> impl std::future::Future> + Send; +} + +/// File writer focus on writing record batch to different physical file format.(Such as parquet. orc) +pub trait FileWriter: Send + 'static + CurrentFileStatus { + /// Write record batch to file. + fn write(&mut self, batch: &RecordBatch) -> impl std::future::Future> + Send; + /// Close file writer. + fn close(self) -> impl std::future::Future> + Send; +} diff --git a/crates/iceberg/src/writer/mod.rs b/crates/iceberg/src/writer/mod.rs new file mode 100644 index 000000000..ac79d7bd4 --- /dev/null +++ b/crates/iceberg/src/writer/mod.rs @@ -0,0 +1,35 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! The iceberg writer module. + +use crate::spec::DataFileBuilder; + +pub mod file_writer; + +type DefaultOutput = Vec; + +/// The current file status of iceberg writer. It implement for the writer which write a single +/// file. +pub trait CurrentFileStatus { + /// Get the current file path. + fn current_file_path(&self) -> String; + /// Get the current file row number. + fn current_row_num(&self) -> usize; + /// Get the current file written size. + fn current_written_size(&self) -> usize; +} From 0ad07424ffd91deb8ae8c07161dab1b43e0de209 Mon Sep 17 00:00:00 2001 From: ZENOTME Date: Thu, 18 Jan 2024 16:10:57 +0800 Subject: [PATCH 2/2] refine --- crates/iceberg/src/writer/file_writer/mod.rs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/crates/iceberg/src/writer/file_writer/mod.rs b/crates/iceberg/src/writer/file_writer/mod.rs index c1a9884e6..c8251fde7 100644 --- a/crates/iceberg/src/writer/file_writer/mod.rs +++ b/crates/iceberg/src/writer/file_writer/mod.rs @@ -20,19 +20,20 @@ use super::{CurrentFileStatus, DefaultOutput}; use crate::Result; use arrow_array::RecordBatch; +use futures::Future; /// File writer builder trait. pub trait FileWriterBuilder: Send + Clone + 'static { /// The associated file writer type. type R: FileWriter; /// Build file writer. - fn build(self) -> impl std::future::Future> + Send; + fn build(self) -> impl Future> + Send; } /// File writer focus on writing record batch to different physical file format.(Such as parquet. orc) -pub trait FileWriter: Send + 'static + CurrentFileStatus { +pub trait FileWriter: Send + CurrentFileStatus + 'static { /// Write record batch to file. - fn write(&mut self, batch: &RecordBatch) -> impl std::future::Future> + Send; + fn write(&mut self, batch: &RecordBatch) -> impl Future> + Send; /// Close file writer. - fn close(self) -> impl std::future::Future> + Send; + fn close(self) -> impl Future> + Send; }