|
60 | 60 | from pyiceberg.partitioning import UNPARTITIONED_PARTITION_SPEC, PartitionSpec
|
61 | 61 | from pyiceberg.schema import Schema
|
62 | 62 | from pyiceberg.serializers import FromInputFile
|
63 |
| -from pyiceberg.table import CommitTableRequest, CommitTableResponse, Table, update_table_metadata |
| 63 | +from pyiceberg.table import CommitTableRequest, CommitTableResponse, Table |
64 | 64 | from pyiceberg.table.metadata import new_table_metadata
|
65 | 65 | from pyiceberg.table.sorting import UNSORTED_SORT_ORDER, SortOrder
|
66 | 66 | from pyiceberg.typedef import EMPTY_DICT, Identifier, Properties
|
@@ -402,59 +402,83 @@ def _commit_table(self, table_request: CommitTableRequest) -> CommitTableRespons
|
402 | 402 | identifier_tuple = self.identifier_to_tuple_without_catalog(
|
403 | 403 | tuple(table_request.identifier.namespace.root + [table_request.identifier.name])
|
404 | 404 | )
|
405 |
| - current_table = self.load_table(identifier_tuple) |
406 | 405 | namespace_tuple = Catalog.namespace_from(identifier_tuple)
|
407 | 406 | namespace = Catalog.namespace_to_string(namespace_tuple)
|
408 | 407 | table_name = Catalog.table_name_from(identifier_tuple)
|
409 |
| - base_metadata = current_table.metadata |
410 |
| - for requirement in table_request.requirements: |
411 |
| - requirement.validate(base_metadata) |
412 | 408 |
|
413 |
| - updated_metadata = update_table_metadata(base_metadata, table_request.updates) |
414 |
| - if updated_metadata == base_metadata: |
415 |
| - # no changes, do nothing |
416 |
| - return CommitTableResponse(metadata=base_metadata, metadata_location=current_table.metadata_location) |
| 409 | + current_table: Optional[Table] |
| 410 | + try: |
| 411 | + current_table = self.load_table(identifier_tuple) |
| 412 | + except NoSuchTableError: |
| 413 | + current_table = None |
417 | 414 |
|
418 |
| - # write new metadata |
419 |
| - new_metadata_version = self._parse_metadata_version(current_table.metadata_location) + 1 |
420 |
| - new_metadata_location = self._get_metadata_location(current_table.metadata.location, new_metadata_version) |
421 |
| - self._write_metadata(updated_metadata, current_table.io, new_metadata_location) |
| 415 | + updated_staged_table = self._update_and_stage_table(current_table, table_request) |
| 416 | + if current_table and updated_staged_table.metadata == current_table.metadata: |
| 417 | + # no changes, do nothing |
| 418 | + return CommitTableResponse(metadata=current_table.metadata, metadata_location=current_table.metadata_location) |
| 419 | + self._write_metadata( |
| 420 | + metadata=updated_staged_table.metadata, |
| 421 | + io=updated_staged_table.io, |
| 422 | + metadata_path=updated_staged_table.metadata_location, |
| 423 | + ) |
422 | 424 |
|
423 | 425 | with Session(self.engine) as session:
|
424 |
| - if self.engine.dialect.supports_sane_rowcount: |
425 |
| - stmt = ( |
426 |
| - update(IcebergTables) |
427 |
| - .where( |
428 |
| - IcebergTables.catalog_name == self.name, |
429 |
| - IcebergTables.table_namespace == namespace, |
430 |
| - IcebergTables.table_name == table_name, |
431 |
| - IcebergTables.metadata_location == current_table.metadata_location, |
432 |
| - ) |
433 |
| - .values(metadata_location=new_metadata_location, previous_metadata_location=current_table.metadata_location) |
434 |
| - ) |
435 |
| - result = session.execute(stmt) |
436 |
| - if result.rowcount < 1: |
437 |
| - raise CommitFailedException(f"Table has been updated by another process: {namespace}.{table_name}") |
438 |
| - else: |
439 |
| - try: |
440 |
| - tbl = ( |
441 |
| - session.query(IcebergTables) |
442 |
| - .with_for_update(of=IcebergTables) |
443 |
| - .filter( |
| 426 | + if current_table: |
| 427 | + # table exists, update it |
| 428 | + if self.engine.dialect.supports_sane_rowcount: |
| 429 | + stmt = ( |
| 430 | + update(IcebergTables) |
| 431 | + .where( |
444 | 432 | IcebergTables.catalog_name == self.name,
|
445 | 433 | IcebergTables.table_namespace == namespace,
|
446 | 434 | IcebergTables.table_name == table_name,
|
447 | 435 | IcebergTables.metadata_location == current_table.metadata_location,
|
448 | 436 | )
|
449 |
| - .one() |
| 437 | + .values( |
| 438 | + metadata_location=updated_staged_table.metadata_location, |
| 439 | + previous_metadata_location=current_table.metadata_location, |
| 440 | + ) |
450 | 441 | )
|
451 |
| - tbl.metadata_location = new_metadata_location |
452 |
| - tbl.previous_metadata_location = current_table.metadata_location |
453 |
| - except NoResultFound as e: |
454 |
| - raise CommitFailedException(f"Table has been updated by another process: {namespace}.{table_name}") from e |
455 |
| - session.commit() |
| 442 | + result = session.execute(stmt) |
| 443 | + if result.rowcount < 1: |
| 444 | + raise CommitFailedException(f"Table has been updated by another process: {namespace}.{table_name}") |
| 445 | + else: |
| 446 | + try: |
| 447 | + tbl = ( |
| 448 | + session.query(IcebergTables) |
| 449 | + .with_for_update(of=IcebergTables) |
| 450 | + .filter( |
| 451 | + IcebergTables.catalog_name == self.name, |
| 452 | + IcebergTables.table_namespace == namespace, |
| 453 | + IcebergTables.table_name == table_name, |
| 454 | + IcebergTables.metadata_location == current_table.metadata_location, |
| 455 | + ) |
| 456 | + .one() |
| 457 | + ) |
| 458 | + tbl.metadata_location = updated_staged_table.metadata_location |
| 459 | + tbl.previous_metadata_location = current_table.metadata_location |
| 460 | + except NoResultFound as e: |
| 461 | + raise CommitFailedException(f"Table has been updated by another process: {namespace}.{table_name}") from e |
| 462 | + session.commit() |
| 463 | + else: |
| 464 | + # table does not exist, create it |
| 465 | + try: |
| 466 | + session.add( |
| 467 | + IcebergTables( |
| 468 | + catalog_name=self.name, |
| 469 | + table_namespace=namespace, |
| 470 | + table_name=table_name, |
| 471 | + metadata_location=updated_staged_table.metadata_location, |
| 472 | + previous_metadata_location=None, |
| 473 | + ) |
| 474 | + ) |
| 475 | + session.commit() |
| 476 | + except IntegrityError as e: |
| 477 | + raise TableAlreadyExistsError(f"Table {namespace}.{table_name} already exists") from e |
456 | 478 |
|
457 |
| - return CommitTableResponse(metadata=updated_metadata, metadata_location=new_metadata_location) |
| 479 | + return CommitTableResponse( |
| 480 | + metadata=updated_staged_table.metadata, metadata_location=updated_staged_table.metadata_location |
| 481 | + ) |
458 | 482 |
|
459 | 483 | def _namespace_exists(self, identifier: Union[str, Identifier]) -> bool:
|
460 | 484 | namespace_tuple = Catalog.identifier_to_tuple(identifier)
|
|
0 commit comments