Skip to content

Commit 565abd9

Browse files
author
ZENOTME
committed
store current table instead of current metadata
1 parent eb12128 commit 565abd9

File tree

2 files changed

+64
-37
lines changed

2 files changed

+64
-37
lines changed

crates/iceberg/src/table.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,10 @@ pub struct Table {
162162
}
163163

164164
impl Table {
165+
pub(crate) fn with_metadata(&mut self, metadata: TableMetadataRef) {
166+
self.metadata = metadata;
167+
}
168+
165169
/// Returns a TableBuilder to build a table
166170
pub fn builder() -> TableBuilder {
167171
TableBuilder::new()

crates/iceberg/src/transaction.rs

Lines changed: 60 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ use std::collections::{HashMap, HashSet};
2222
use std::future::Future;
2323
use std::mem::discriminant;
2424
use std::ops::RangeFrom;
25+
use std::sync::Arc;
2526

2627
use arrow_array::StringArray;
2728
use futures::TryStreamExt;
@@ -32,8 +33,7 @@ use crate::io::OutputFile;
3233
use crate::spec::{
3334
DataFile, DataFileFormat, FormatVersion, ManifestEntry, ManifestFile, ManifestListWriter,
3435
ManifestWriterBuilder, NullOrder, Operation, Snapshot, SnapshotReference, SnapshotRetention,
35-
SortDirection, SortField, SortOrder, Struct, StructType, Summary, TableMetadata, Transform,
36-
MAIN_BRANCH,
36+
SortDirection, SortField, SortOrder, Struct, StructType, Summary, Transform, MAIN_BRANCH,
3737
};
3838
use crate::table::Table;
3939
use crate::writer::file_writer::ParquetWriter;
@@ -45,7 +45,7 @@ const META_ROOT_PATH: &str = "metadata";
4545
/// Table transaction.
4646
pub struct Transaction<'a> {
4747
base_table: &'a Table,
48-
current_metadata: TableMetadata,
48+
current_table: Table,
4949
updates: Vec<TableUpdate>,
5050
requirements: Vec<TableRequirement>,
5151
}
@@ -55,19 +55,20 @@ impl<'a> Transaction<'a> {
5555
pub fn new(table: &'a Table) -> Self {
5656
Self {
5757
base_table: table,
58-
current_metadata: table.metadata().clone(),
58+
current_table: table.clone(),
5959
updates: vec![],
6060
requirements: vec![],
6161
}
6262
}
6363

6464
fn update_table_metadata(&mut self, updates: &[TableUpdate]) -> Result<()> {
65-
let mut metadata_builder = self.current_metadata.clone().into_builder(None);
65+
let mut metadata_builder = self.current_table.metadata().clone().into_builder(None);
6666
for update in updates {
6767
metadata_builder = update.clone().apply(metadata_builder)?;
6868
}
6969

70-
self.current_metadata = metadata_builder.build()?.metadata;
70+
self.current_table
71+
.with_metadata(Arc::new(metadata_builder.build()?.metadata));
7172

7273
Ok(())
7374
}
@@ -78,7 +79,7 @@ impl<'a> Transaction<'a> {
7879
requirements: Vec<TableRequirement>,
7980
) -> Result<()> {
8081
for requirement in &requirements {
81-
requirement.check(Some(&self.current_metadata))?;
82+
requirement.check(Some(self.current_table.metadata()))?;
8283
}
8384

8485
self.update_table_metadata(&updates)?;
@@ -106,7 +107,7 @@ impl<'a> Transaction<'a> {
106107

107108
/// Sets table to a new version.
108109
pub fn upgrade_table_version(mut self, format_version: FormatVersion) -> Result<Self> {
109-
let current_version = self.current_metadata.format_version();
110+
let current_version = self.current_table.metadata().format_version();
110111
match current_version.cmp(&format_version) {
111112
Ordering::Greater => {
112113
return Err(Error::new(
@@ -145,7 +146,8 @@ impl<'a> Transaction<'a> {
145146
};
146147
let mut snapshot_id = generate_random_id();
147148
while self
148-
.current_metadata
149+
.current_table
150+
.metadata()
149151
.snapshots()
150152
.any(|s| s.snapshot_id() == snapshot_id)
151153
{
@@ -247,7 +249,8 @@ impl<'a> FastAppendAction<'a> {
247249
if !self
248250
.snapshot_produce_action
249251
.tx
250-
.current_metadata
252+
.current_table
253+
.metadata()
251254
.default_spec
252255
.is_unpartitioned()
253256
{
@@ -258,9 +261,9 @@ impl<'a> FastAppendAction<'a> {
258261
}
259262

260263
let data_files = ParquetWriter::parquet_files_to_data_files(
261-
self.snapshot_produce_action.tx.base_table.file_io(),
264+
self.snapshot_produce_action.tx.current_table.file_io(),
262265
file_path,
263-
&self.snapshot_produce_action.tx.current_metadata,
266+
self.snapshot_produce_action.tx.current_table.metadata(),
264267
)
265268
.await?;
266269

@@ -283,7 +286,7 @@ impl<'a> FastAppendAction<'a> {
283286
let mut manifest_stream = self
284287
.snapshot_produce_action
285288
.tx
286-
.base_table
289+
.current_table
287290
.inspect()
288291
.manifests()
289292
.scan()
@@ -345,14 +348,19 @@ impl SnapshotProduceOperation for FastAppendOperation {
345348
&self,
346349
snapshot_produce: &SnapshotProduceAction<'_>,
347350
) -> Result<Vec<ManifestFile>> {
348-
let Some(snapshot) = snapshot_produce.tx.current_metadata.current_snapshot() else {
351+
let Some(snapshot) = snapshot_produce
352+
.tx
353+
.current_table
354+
.metadata()
355+
.current_snapshot()
356+
else {
349357
return Ok(vec![]);
350358
};
351359

352360
let manifest_list = snapshot
353361
.load_manifest_list(
354-
snapshot_produce.tx.base_table.file_io(),
355-
&snapshot_produce.tx.current_metadata,
362+
snapshot_produce.tx.current_table.file_io(),
363+
snapshot_produce.tx.current_table.metadata(),
356364
)
357365
.await?;
358366

@@ -470,7 +478,7 @@ impl<'a> SnapshotProduceAction<'a> {
470478
}
471479
Self::validate_partition_value(
472480
data_file.partition(),
473-
self.tx.current_metadata.default_partition_type(),
481+
self.tx.current_table.metadata().default_partition_type(),
474482
)?;
475483
}
476484
self.added_data_files.extend(data_files);
@@ -480,20 +488,23 @@ impl<'a> SnapshotProduceAction<'a> {
480488
fn new_manifest_output(&mut self) -> Result<OutputFile> {
481489
let new_manifest_path = format!(
482490
"{}/{}/{}-m{}.{}",
483-
self.tx.current_metadata.location(),
491+
self.tx.current_table.metadata().location(),
484492
META_ROOT_PATH,
485493
self.commit_uuid,
486494
self.manifest_counter.next().unwrap(),
487495
DataFileFormat::Avro
488496
);
489-
self.tx.base_table.file_io().new_output(new_manifest_path)
497+
self.tx
498+
.current_table
499+
.file_io()
500+
.new_output(new_manifest_path)
490501
}
491502

492503
// Write manifest file for added data files and return the ManifestFile for ManifestList.
493504
async fn write_added_manifest(&mut self) -> Result<ManifestFile> {
494505
let added_data_files = std::mem::take(&mut self.added_data_files);
495506
let snapshot_id = self.snapshot_id;
496-
let format_version = self.tx.current_metadata.format_version();
507+
let format_version = self.tx.current_table.metadata().format_version();
497508
let manifest_entries = added_data_files.into_iter().map(|data_file| {
498509
let builder = ManifestEntry::builder()
499510
.status(crate::spec::ManifestStatus::Added)
@@ -511,14 +522,15 @@ impl<'a> SnapshotProduceAction<'a> {
511522
self.new_manifest_output()?,
512523
Some(self.snapshot_id),
513524
self.key_metadata.clone(),
514-
self.tx.current_metadata.current_schema().clone(),
525+
self.tx.current_table.metadata().current_schema().clone(),
515526
self.tx
516-
.current_metadata
527+
.current_table
528+
.metadata()
517529
.default_partition_spec()
518530
.as_ref()
519531
.clone(),
520532
);
521-
if self.tx.current_metadata.format_version() == FormatVersion::V1 {
533+
if self.tx.current_table.metadata().format_version() == FormatVersion::V1 {
522534
builder.build_v1()
523535
} else {
524536
builder.build_v2_data()
@@ -558,7 +570,7 @@ impl<'a> SnapshotProduceAction<'a> {
558570
fn generate_manifest_list_file_path(&self, attempt: i64) -> String {
559571
format!(
560572
"{}/{}/snap-{}-{}-{}.{}",
561-
self.tx.current_metadata.location(),
573+
self.tx.current_table.metadata().location(),
562574
META_ROOT_PATH,
563575
self.snapshot_id,
564576
attempt,
@@ -576,28 +588,28 @@ impl<'a> SnapshotProduceAction<'a> {
576588
let new_manifests = self
577589
.manifest_file(&snapshot_produce_operation, &process)
578590
.await?;
579-
let next_seq_num = self.tx.current_metadata.next_sequence_number();
591+
let next_seq_num = self.tx.current_table.metadata().next_sequence_number();
580592

581593
let summary = self.summary(&snapshot_produce_operation);
582594

583595
let manifest_list_path = self.generate_manifest_list_file_path(0);
584596

585-
let mut manifest_list_writer = match self.tx.current_metadata.format_version() {
597+
let mut manifest_list_writer = match self.tx.current_table.metadata().format_version() {
586598
FormatVersion::V1 => ManifestListWriter::v1(
587599
self.tx
588-
.base_table
600+
.current_table
589601
.file_io()
590602
.new_output(manifest_list_path.clone())?,
591603
self.snapshot_id,
592-
self.tx.current_metadata.current_snapshot_id(),
604+
self.tx.current_table.metadata().current_snapshot_id(),
593605
),
594606
FormatVersion::V2 => ManifestListWriter::v2(
595607
self.tx
596-
.base_table
608+
.current_table
597609
.file_io()
598610
.new_output(manifest_list_path.clone())?,
599611
self.snapshot_id,
600-
self.tx.current_metadata.current_snapshot_id(),
612+
self.tx.current_table.metadata().current_snapshot_id(),
601613
next_seq_num,
602614
),
603615
};
@@ -608,10 +620,10 @@ impl<'a> SnapshotProduceAction<'a> {
608620
let new_snapshot = Snapshot::builder()
609621
.with_manifest_list(manifest_list_path)
610622
.with_snapshot_id(self.snapshot_id)
611-
.with_parent_snapshot_id(self.tx.current_metadata.current_snapshot_id())
623+
.with_parent_snapshot_id(self.tx.current_table.metadata().current_snapshot_id())
612624
.with_sequence_number(next_seq_num)
613625
.with_summary(summary)
614-
.with_schema_id(self.tx.current_metadata.current_schema_id())
626+
.with_schema_id(self.tx.current_table.metadata().current_schema_id())
615627
.with_timestamp_ms(commit_ts)
616628
.build();
617629

@@ -630,11 +642,11 @@ impl<'a> SnapshotProduceAction<'a> {
630642
],
631643
vec![
632644
TableRequirement::UuidMatch {
633-
uuid: self.tx.current_metadata.uuid(),
645+
uuid: self.tx.current_table.metadata().uuid(),
634646
},
635647
TableRequirement::RefSnapshotIdMatch {
636648
r#ref: MAIN_BRANCH.to_string(),
637-
snapshot_id: self.tx.current_metadata.current_snapshot_id(),
649+
snapshot_id: self.tx.current_table.metadata().current_snapshot_id(),
638650
},
639651
],
640652
)?;
@@ -674,10 +686,20 @@ impl<'a> ReplaceSortOrderAction<'a> {
674686

675687
let requirements = vec![
676688
TableRequirement::CurrentSchemaIdMatch {
677-
current_schema_id: self.tx.current_metadata.current_schema().schema_id(),
689+
current_schema_id: self
690+
.tx
691+
.current_table
692+
.metadata()
693+
.current_schema()
694+
.schema_id(),
678695
},
679696
TableRequirement::DefaultSortOrderIdMatch {
680-
default_sort_order_id: self.tx.current_metadata.default_sort_order().order_id,
697+
default_sort_order_id: self
698+
.tx
699+
.current_table
700+
.metadata()
701+
.default_sort_order()
702+
.order_id,
681703
},
682704
];
683705

@@ -693,7 +715,8 @@ impl<'a> ReplaceSortOrderAction<'a> {
693715
) -> Result<Self> {
694716
let field_id = self
695717
.tx
696-
.current_metadata
718+
.current_table
719+
.metadata()
697720
.current_schema()
698721
.field_id_by_name(name)
699722
.ok_or_else(|| {

0 commit comments

Comments
 (0)