Skip to content

Commit 90115e0

Browse files
author
ZENOTME
committed
add delete file support for transaction
1 parent fe784ad commit 90115e0

File tree

1 file changed

+60
-20
lines changed

1 file changed

+60
-20
lines changed

crates/iceberg/src/transaction.rs

Lines changed: 60 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,10 @@ use uuid::Uuid;
2828
use crate::error::Result;
2929
use crate::io::OutputFile;
3030
use crate::spec::{
31-
DataFile, DataFileFormat, FormatVersion, Manifest, ManifestEntry, ManifestFile,
32-
ManifestListWriter, ManifestMetadata, ManifestWriter, NullOrder, Operation, Snapshot,
33-
SnapshotReference, SnapshotRetention, SortDirection, SortField, SortOrder, Struct, StructType,
34-
Summary, Transform, MAIN_BRANCH,
31+
DataContentType, DataFile, DataFileFormat, FormatVersion, Manifest, ManifestContentType,
32+
ManifestEntry, ManifestFile, ManifestListWriter, ManifestMetadata, ManifestWriter, NullOrder,
33+
Operation, Snapshot, SnapshotReference, SnapshotRetention, SortDirection, SortField, SortOrder,
34+
Struct, StructType, Summary, Transform, MAIN_BRANCH,
3535
};
3636
use crate::table::Table;
3737
use crate::TableUpdate::UpgradeFormatVersion;
@@ -170,6 +170,17 @@ impl<'a> Transaction<'a> {
170170

171171
catalog.update_table(table_commit).await
172172
}
173+
174+
/// Commit transaction with dynamic catalog.
175+
pub async fn commit_dyn(self, catalog: &dyn Catalog) -> Result<Table> {
176+
let table_commit = TableCommit::builder()
177+
.ident(self.table.identifier().clone())
178+
.updates(self.updates)
179+
.requirements(self.requirements)
180+
.build();
181+
182+
catalog.update_table(table_commit).await
183+
}
173184
}
174185

175186
/// FastAppendAction is a transaction action for fast append data files to the table.
@@ -284,6 +295,7 @@ struct SnapshotProduceAction<'a> {
284295
commit_uuid: Uuid,
285296
snapshot_properties: HashMap<String, String>,
286297
added_data_files: Vec<DataFile>,
298+
added_delete_files: Vec<DataFile>,
287299
// A counter used to generate unique manifest file names.
288300
// It starts from 0 and increments for each new manifest file.
289301
// Note: This counter is limited to the range of (0..u64::MAX).
@@ -304,6 +316,7 @@ impl<'a> SnapshotProduceAction<'a> {
304316
commit_uuid,
305317
snapshot_properties,
306318
added_data_files: vec![],
319+
added_delete_files: vec![],
307320
manifest_counter: (0..),
308321
key_metadata,
309322
})
@@ -335,7 +348,12 @@ impl<'a> SnapshotProduceAction<'a> {
335348
return Err(Error::new(
336349
ErrorKind::DataInvalid,
337350
"Partition value is not compatitable partition type",
338-
));
351+
)
352+
.with_context(
353+
"partition value",
354+
format!("{:?}", &value.as_primitive_literal().unwrap()),
355+
)
356+
.with_context("partition type", format!("{:?}", field.field_type)));
339357
}
340358
}
341359
Ok(())
@@ -347,13 +365,7 @@ impl<'a> SnapshotProduceAction<'a> {
347365
data_files: impl IntoIterator<Item = DataFile>,
348366
) -> Result<&mut Self> {
349367
let data_files: Vec<DataFile> = data_files.into_iter().collect();
350-
for data_file in &data_files {
351-
if data_file.content_type() != crate::spec::DataContentType::Data {
352-
return Err(Error::new(
353-
ErrorKind::DataInvalid,
354-
"Only data content type is allowed for fast append",
355-
));
356-
}
368+
for data_file in data_files {
357369
Self::validate_partition_value(
358370
data_file.partition(),
359371
self.tx
@@ -362,8 +374,12 @@ impl<'a> SnapshotProduceAction<'a> {
362374
.default_partition_spec()
363375
.partition_type(),
364376
)?;
377+
if data_file.content_type() == DataContentType::Data {
378+
self.added_data_files.push(data_file);
379+
} else {
380+
self.added_delete_files.push(data_file);
381+
}
365382
}
366-
self.added_data_files.extend(data_files);
367383
Ok(self)
368384
}
369385

@@ -380,8 +396,31 @@ impl<'a> SnapshotProduceAction<'a> {
380396
}
381397

382398
// Write manifest file for added data files and return the ManifestFile for ManifestList.
383-
async fn write_added_manifest(&mut self) -> Result<ManifestFile> {
384-
let added_data_files = std::mem::take(&mut self.added_data_files);
399+
async fn write_added_manifest(
400+
&mut self,
401+
added_data_files: Vec<DataFile>,
402+
) -> Result<ManifestFile> {
403+
let content_type = {
404+
let mut data_num = 0;
405+
let mut delete_num = 0;
406+
for f in &added_data_files {
407+
match f.content_type() {
408+
DataContentType::Data => data_num = data_num + 1,
409+
DataContentType::PositionDeletes => delete_num = delete_num + 1,
410+
DataContentType::EqualityDeletes => delete_num = delete_num + 1,
411+
}
412+
}
413+
if data_num == added_data_files.len() {
414+
ManifestContentType::Data
415+
} else if delete_num == added_data_files.len() {
416+
ManifestContentType::Deletes
417+
} else {
418+
return Err(Error::new(
419+
ErrorKind::DataInvalid,
420+
"added DataFile for a ManifestFile should be same type (Data or Delete)",
421+
));
422+
}
423+
};
385424
let manifest_entries = added_data_files
386425
.into_iter()
387426
.map(|data_file| {
@@ -410,7 +449,7 @@ impl<'a> SnapshotProduceAction<'a> {
410449
.as_ref()
411450
.clone(),
412451
)
413-
.content(crate::spec::ManifestContentType::Data)
452+
.content(content_type)
414453
.build();
415454
let manifest = Manifest::new(manifest_meta, manifest_entries);
416455
let writer = ManifestWriter::new(
@@ -426,12 +465,13 @@ impl<'a> SnapshotProduceAction<'a> {
426465
snapshot_produce_operation: &OP,
427466
manifest_process: &MP,
428467
) -> Result<Vec<ManifestFile>> {
429-
let added_manifest = self.write_added_manifest().await?;
468+
let data_files = std::mem::take(&mut self.added_data_files);
469+
let delete_files = std::mem::take(&mut self.added_delete_files);
470+
let added_manifest = self.write_added_manifest(data_files).await?;
471+
let added_delete_manifest = self.write_added_manifest(delete_files).await?;
430472
let existing_manifests = snapshot_produce_operation.existing_manifest(self).await?;
431-
// # TODO
432-
// Support process delete entries.
433473

434-
let mut manifest_files = vec![added_manifest];
474+
let mut manifest_files = vec![added_manifest, added_delete_manifest];
435475
manifest_files.extend(existing_manifests);
436476
let manifest_files = manifest_process.process_manifeset(manifest_files);
437477
Ok(manifest_files)

0 commit comments

Comments
 (0)