diff --git a/.asf.yaml b/.asf.yaml index b1f557e903..209b722893 100644 --- a/.asf.yaml +++ b/.asf.yaml @@ -45,6 +45,7 @@ github: collaborators: # Note: the number of collaborators is limited to 10 - ajantha-bhat - syun64 + - kevinjqliu ghp_branch: gh-pages ghp_path: / diff --git a/.github/workflows/python-release.yml b/.github/workflows/python-release.yml index 54446049a4..b8d9b5dae3 100644 --- a/.github/workflows/python-release.yml +++ b/.github/workflows/python-release.yml @@ -59,7 +59,7 @@ jobs: if: startsWith(matrix.os, 'ubuntu') - name: Build wheels - uses: pypa/cibuildwheel@v2.17.0 + uses: pypa/cibuildwheel@v2.18.1 with: output-dir: wheelhouse config-file: "pyproject.toml" diff --git a/mkdocs/docs/api.md b/mkdocs/docs/api.md index 0bc23fb0dc..70b5fd62eb 100644 --- a/mkdocs/docs/api.md +++ b/mkdocs/docs/api.md @@ -606,6 +606,56 @@ min_snapshots_to_keep: [[null,10]] max_snapshot_age_in_ms: [[null,604800000]] ``` +### Manifests + +To show a table's current file manifests: + +```python +table.inspect.manifests() +``` + +``` +pyarrow.Table +content: int8 not null +path: string not null +length: int64 not null +partition_spec_id: int32 not null +added_snapshot_id: int64 not null +added_data_files_count: int32 not null +existing_data_files_count: int32 not null +deleted_data_files_count: int32 not null +added_delete_files_count: int32 not null +existing_delete_files_count: int32 not null +deleted_delete_files_count: int32 not null +partition_summaries: list> not null + child 0, item: struct + child 0, contains_null: bool not null + child 1, contains_nan: bool + child 2, lower_bound: string + child 3, upper_bound: string +---- +content: [[0]] +path: [["s3://warehouse/default/table_metadata_manifests/metadata/3bf5b4c6-a7a4-4b43-a6ce-ca2b4887945a-m0.avro"]] +length: [[6886]] +partition_spec_id: [[0]] +added_snapshot_id: [[3815834705531553721]] +added_data_files_count: [[1]] +existing_data_files_count: [[0]] +deleted_data_files_count: [[0]] +added_delete_files_count: [[0]] +existing_delete_files_count: [[0]] +deleted_delete_files_count: [[0]] +partition_summaries: [[ -- is_valid: all not null + -- child 0 type: bool +[false] + -- child 1 type: bool +[false] + -- child 2 type: string +["test"] + -- child 3 type: string +["test"]]] +``` + ## Add Files Expert Iceberg users may choose to commit existing parquet files to the Iceberg table as data files, without rewriting them. diff --git a/mkdocs/docs/configuration.md b/mkdocs/docs/configuration.md index 1ca071f009..f8a69119c8 100644 --- a/mkdocs/docs/configuration.md +++ b/mkdocs/docs/configuration.md @@ -89,6 +89,7 @@ For the FileIO there are several configuration options available: | s3.access-key-id | admin | Configure the static secret access key used to access the FileIO. | | s3.secret-access-key | password | Configure the static session token used to access the FileIO. | | s3.signer | bearer | Configure the signature version of the FileIO. | +| s3.signer.uri | http://my.signer:8080/s3 | Configure the remote signing uri if it differs from the catalog uri. Remote signing is only implemented for `FsspecFileIO`. The final request is sent to `/v1/aws/s3/sign`. | | s3.region | us-west-2 | Sets the region of the bucket | | s3.proxy-uri | http://my.proxy.com:8080 | Configure the proxy server to be used by the FileIO. | | s3.connect-timeout | 60.0 | Configure socket connection timeout, in seconds. | @@ -298,4 +299,4 @@ PyIceberg uses multiple threads to parallelize operations. The number of workers # Backward Compatibility -Previous versions of Java (`<1.4.0`) implementations incorrectly assume the optional attribute `current-snapshot-id` to be a required attribute in TableMetadata. This means that if `current-snapshot-id` is missing in the metadata file (e.g. on table creation), the application will throw an exception without being able to load the table. This assumption has been corrected in more recent Iceberg versions. However, it is possible to force PyIceberg to create a table with a metadata file that will be compatible with previous versions. This can be configured by setting the `legacy-current-snapshot-id` entry as "True" in the configuration file, or by setting the `LEGACY_CURRENT_SNAPSHOT_ID` environment variable. Refer to the [PR discussion](https://github.com/apache/iceberg-python/pull/473) for more details on the issue +Previous versions of Java (`<1.4.0`) implementations incorrectly assume the optional attribute `current-snapshot-id` to be a required attribute in TableMetadata. This means that if `current-snapshot-id` is missing in the metadata file (e.g. on table creation), the application will throw an exception without being able to load the table. This assumption has been corrected in more recent Iceberg versions. However, it is possible to force PyIceberg to create a table with a metadata file that will be compatible with previous versions. This can be configured by setting the `legacy-current-snapshot-id` entry as "True" in the configuration file, or by setting the `PYICEBERG_LEGACY_CURRENT_SNAPSHOT_ID` environment variable. Refer to the [PR discussion](https://github.com/apache/iceberg-python/pull/473) for more details on the issue diff --git a/mkdocs/requirements.txt b/mkdocs/requirements.txt index 83a067ce8f..22ded02b4c 100644 --- a/mkdocs/requirements.txt +++ b/mkdocs/requirements.txt @@ -16,13 +16,13 @@ # under the License. mkdocs==1.6.0 -griffe==0.44.0 -jinja2==3.1.3 -mkdocstrings==0.25.0 -mkdocstrings-python==1.10.0 +griffe==0.45.2 +jinja2==3.1.4 +mkdocstrings==0.25.1 +mkdocstrings-python==1.10.3 mkdocs-literate-nav==0.6.1 mkdocs-autorefs==1.0.1 mkdocs-gen-files==0.5.0 -mkdocs-material==9.5.20 +mkdocs-material==9.5.25 mkdocs-material-extensions==1.3.1 -mkdocs-section-index==0.3.8 +mkdocs-section-index==0.3.9 diff --git a/poetry.lock b/poetry.lock index 2821d1c687..adacbf7179 100644 --- a/poetry.lock +++ b/poetry.lock @@ -25,24 +25,24 @@ tests = ["arrow", "dask[dataframe]", "docker", "pytest", "pytest-mock"] [[package]] name = "aiobotocore" -version = "2.12.3" +version = "2.13.0" description = "Async client for aws services using botocore and aiohttp" optional = true python-versions = ">=3.8" files = [ - {file = "aiobotocore-2.12.3-py3-none-any.whl", hash = "sha256:86737685f4625e8f05c4e7a608a07cc97607263279f66cf6b02b640c4eafd324"}, - {file = "aiobotocore-2.12.3.tar.gz", hash = "sha256:e2a2929207bc5d62eb556106c2224c1fd106d5c65be2eb69f15cc8c34c44c236"}, + {file = "aiobotocore-2.13.0-py3-none-any.whl", hash = "sha256:f812afc678d71b0038fd1ce712ff111ab7f47bab81ce5b4c7d222d4b83bc0cb2"}, + {file = "aiobotocore-2.13.0.tar.gz", hash = "sha256:4badf5cab6ad400216319d14278e2c99ad9b708e28a0f231605a412e632de401"}, ] [package.dependencies] -aiohttp = ">=3.7.4.post0,<4.0.0" +aiohttp = ">=3.9.2,<4.0.0" aioitertools = ">=0.5.1,<1.0.0" -botocore = ">=1.34.41,<1.34.70" +botocore = ">=1.34.70,<1.34.107" wrapt = ">=1.10.10,<2.0.0" [package.extras] -awscli = ["awscli (>=1.32.41,<1.32.70)"] -boto3 = ["boto3 (>=1.34.41,<1.34.70)"] +awscli = ["awscli (>=1.32.70,<1.32.107)"] +boto3 = ["boto3 (>=1.34.70,<1.34.107)"] [[package]] name = "aiohttp" @@ -343,17 +343,17 @@ files = [ [[package]] name = "boto3" -version = "1.34.69" +version = "1.34.106" description = "The AWS SDK for Python" optional = false python-versions = ">=3.8" files = [ - {file = "boto3-1.34.69-py3-none-any.whl", hash = "sha256:2e25ef6bd325217c2da329829478be063155897d8d3b29f31f7f23ab548519b1"}, - {file = "boto3-1.34.69.tar.gz", hash = "sha256:898a5fed26b1351352703421d1a8b886ef2a74be6c97d5ecc92432ae01fda203"}, + {file = "boto3-1.34.106-py3-none-any.whl", hash = "sha256:d3be4e1dd5d546a001cd4da805816934cbde9d395316546e9411fec341ade5cf"}, + {file = "boto3-1.34.106.tar.gz", hash = "sha256:6165b8cf1c7e625628ab28b32f9027064c8f5e5fca1c38d7fc228cd22069a19f"}, ] [package.dependencies] -botocore = ">=1.34.69,<1.35.0" +botocore = ">=1.34.106,<1.35.0" jmespath = ">=0.7.1,<2.0.0" s3transfer = ">=0.10.0,<0.11.0" @@ -362,13 +362,13 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] [[package]] name = "botocore" -version = "1.34.69" +version = "1.34.106" description = "Low-level, data-driven core of boto 3." optional = false python-versions = ">=3.8" files = [ - {file = "botocore-1.34.69-py3-none-any.whl", hash = "sha256:d3802d076d4d507bf506f9845a6970ce43adc3d819dd57c2791f5c19ed6e5950"}, - {file = "botocore-1.34.69.tar.gz", hash = "sha256:d1ab2bff3c2fd51719c2021d9fa2f30fbb9ed0a308f69e9a774ac92c8091380a"}, + {file = "botocore-1.34.106-py3-none-any.whl", hash = "sha256:4baf0e27c2dfc4f4d0dee7c217c716e0782f9b30e8e1fff983fce237d88f73ae"}, + {file = "botocore-1.34.106.tar.gz", hash = "sha256:921fa5202f88c3e58fdcb4b3acffd56d65b24bca47092ee4b27aa988556c0be6"}, ] [package.dependencies] @@ -380,7 +380,7 @@ urllib3 = [ ] [package.extras] -crt = ["awscrt (==0.19.19)"] +crt = ["awscrt (==0.20.9)"] [[package]] name = "build" @@ -652,63 +652,63 @@ files = [ [[package]] name = "coverage" -version = "7.5.0" +version = "7.5.3" description = "Code coverage measurement for Python" optional = false python-versions = ">=3.8" files = [ - {file = "coverage-7.5.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:432949a32c3e3f820af808db1833d6d1631664d53dd3ce487aa25d574e18ad1c"}, - {file = "coverage-7.5.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2bd7065249703cbeb6d4ce679c734bef0ee69baa7bff9724361ada04a15b7e3b"}, - {file = "coverage-7.5.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bbfe6389c5522b99768a93d89aca52ef92310a96b99782973b9d11e80511f932"}, - {file = "coverage-7.5.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:39793731182c4be939b4be0cdecde074b833f6171313cf53481f869937129ed3"}, - {file = "coverage-7.5.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:85a5dbe1ba1bf38d6c63b6d2c42132d45cbee6d9f0c51b52c59aa4afba057517"}, - {file = "coverage-7.5.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:357754dcdfd811462a725e7501a9b4556388e8ecf66e79df6f4b988fa3d0b39a"}, - {file = "coverage-7.5.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:a81eb64feded34f40c8986869a2f764f0fe2db58c0530d3a4afbcde50f314880"}, - {file = "coverage-7.5.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:51431d0abbed3a868e967f8257c5faf283d41ec882f58413cf295a389bb22e58"}, - {file = "coverage-7.5.0-cp310-cp310-win32.whl", hash = "sha256:f609ebcb0242d84b7adeee2b06c11a2ddaec5464d21888b2c8255f5fd6a98ae4"}, - {file = "coverage-7.5.0-cp310-cp310-win_amd64.whl", hash = "sha256:6782cd6216fab5a83216cc39f13ebe30adfac2fa72688c5a4d8d180cd52e8f6a"}, - {file = "coverage-7.5.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:e768d870801f68c74c2b669fc909839660180c366501d4cc4b87efd6b0eee375"}, - {file = "coverage-7.5.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:84921b10aeb2dd453247fd10de22907984eaf80901b578a5cf0bb1e279a587cb"}, - {file = "coverage-7.5.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:710c62b6e35a9a766b99b15cdc56d5aeda0914edae8bb467e9c355f75d14ee95"}, - {file = "coverage-7.5.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c379cdd3efc0658e652a14112d51a7668f6bfca7445c5a10dee7eabecabba19d"}, - {file = "coverage-7.5.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fea9d3ca80bcf17edb2c08a4704259dadac196fe5e9274067e7a20511fad1743"}, - {file = "coverage-7.5.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:41327143c5b1d715f5f98a397608f90ab9ebba606ae4e6f3389c2145410c52b1"}, - {file = "coverage-7.5.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:565b2e82d0968c977e0b0f7cbf25fd06d78d4856289abc79694c8edcce6eb2de"}, - {file = "coverage-7.5.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:cf3539007202ebfe03923128fedfdd245db5860a36810136ad95a564a2fdffff"}, - {file = "coverage-7.5.0-cp311-cp311-win32.whl", hash = "sha256:bf0b4b8d9caa8d64df838e0f8dcf68fb570c5733b726d1494b87f3da85db3a2d"}, - {file = "coverage-7.5.0-cp311-cp311-win_amd64.whl", hash = "sha256:9c6384cc90e37cfb60435bbbe0488444e54b98700f727f16f64d8bfda0b84656"}, - {file = "coverage-7.5.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:fed7a72d54bd52f4aeb6c6e951f363903bd7d70bc1cad64dd1f087980d309ab9"}, - {file = "coverage-7.5.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:cbe6581fcff7c8e262eb574244f81f5faaea539e712a058e6707a9d272fe5b64"}, - {file = "coverage-7.5.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ad97ec0da94b378e593ef532b980c15e377df9b9608c7c6da3506953182398af"}, - {file = "coverage-7.5.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bd4bacd62aa2f1a1627352fe68885d6ee694bdaebb16038b6e680f2924a9b2cc"}, - {file = "coverage-7.5.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:adf032b6c105881f9d77fa17d9eebe0ad1f9bfb2ad25777811f97c5362aa07f2"}, - {file = "coverage-7.5.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:4ba01d9ba112b55bfa4b24808ec431197bb34f09f66f7cb4fd0258ff9d3711b1"}, - {file = "coverage-7.5.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:f0bfe42523893c188e9616d853c47685e1c575fe25f737adf473d0405dcfa7eb"}, - {file = "coverage-7.5.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:a9a7ef30a1b02547c1b23fa9a5564f03c9982fc71eb2ecb7f98c96d7a0db5cf2"}, - {file = "coverage-7.5.0-cp312-cp312-win32.whl", hash = "sha256:3c2b77f295edb9fcdb6a250f83e6481c679335ca7e6e4a955e4290350f2d22a4"}, - {file = "coverage-7.5.0-cp312-cp312-win_amd64.whl", hash = "sha256:427e1e627b0963ac02d7c8730ca6d935df10280d230508c0ba059505e9233475"}, - {file = "coverage-7.5.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9dd88fce54abbdbf4c42fb1fea0e498973d07816f24c0e27a1ecaf91883ce69e"}, - {file = "coverage-7.5.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:a898c11dca8f8c97b467138004a30133974aacd572818c383596f8d5b2eb04a9"}, - {file = "coverage-7.5.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:07dfdd492d645eea1bd70fb1d6febdcf47db178b0d99161d8e4eed18e7f62fe7"}, - {file = "coverage-7.5.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d3d117890b6eee85887b1eed41eefe2e598ad6e40523d9f94c4c4b213258e4a4"}, - {file = "coverage-7.5.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6afd2e84e7da40fe23ca588379f815fb6dbbb1b757c883935ed11647205111cb"}, - {file = "coverage-7.5.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:a9960dd1891b2ddf13a7fe45339cd59ecee3abb6b8326d8b932d0c5da208104f"}, - {file = "coverage-7.5.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:ced268e82af993d7801a9db2dbc1d2322e786c5dc76295d8e89473d46c6b84d4"}, - {file = "coverage-7.5.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:e7c211f25777746d468d76f11719e64acb40eed410d81c26cefac641975beb88"}, - {file = "coverage-7.5.0-cp38-cp38-win32.whl", hash = "sha256:262fffc1f6c1a26125d5d573e1ec379285a3723363f3bd9c83923c9593a2ac25"}, - {file = "coverage-7.5.0-cp38-cp38-win_amd64.whl", hash = "sha256:eed462b4541c540d63ab57b3fc69e7d8c84d5957668854ee4e408b50e92ce26a"}, - {file = "coverage-7.5.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:d0194d654e360b3e6cc9b774e83235bae6b9b2cac3be09040880bb0e8a88f4a1"}, - {file = "coverage-7.5.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:33c020d3322662e74bc507fb11488773a96894aa82a622c35a5a28673c0c26f5"}, - {file = "coverage-7.5.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0cbdf2cae14a06827bec50bd58e49249452d211d9caddd8bd80e35b53cb04631"}, - {file = "coverage-7.5.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3235d7c781232e525b0761730e052388a01548bd7f67d0067a253887c6e8df46"}, - {file = "coverage-7.5.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:db2de4e546f0ec4b2787d625e0b16b78e99c3e21bc1722b4977c0dddf11ca84e"}, - {file = "coverage-7.5.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:4d0e206259b73af35c4ec1319fd04003776e11e859936658cb6ceffdeba0f5be"}, - {file = "coverage-7.5.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:2055c4fb9a6ff624253d432aa471a37202cd8f458c033d6d989be4499aed037b"}, - {file = "coverage-7.5.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:075299460948cd12722a970c7eae43d25d37989da682997687b34ae6b87c0ef0"}, - {file = "coverage-7.5.0-cp39-cp39-win32.whl", hash = "sha256:280132aada3bc2f0fac939a5771db4fbb84f245cb35b94fae4994d4c1f80dae7"}, - {file = "coverage-7.5.0-cp39-cp39-win_amd64.whl", hash = "sha256:c58536f6892559e030e6924896a44098bc1290663ea12532c78cef71d0df8493"}, - {file = "coverage-7.5.0-pp38.pp39.pp310-none-any.whl", hash = "sha256:2b57780b51084d5223eee7b59f0d4911c31c16ee5aa12737c7a02455829ff067"}, - {file = "coverage-7.5.0.tar.gz", hash = "sha256:cf62d17310f34084c59c01e027259076479128d11e4661bb6c9acb38c5e19bb8"}, + {file = "coverage-7.5.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a6519d917abb15e12380406d721e37613e2a67d166f9fb7e5a8ce0375744cd45"}, + {file = "coverage-7.5.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:aea7da970f1feccf48be7335f8b2ca64baf9b589d79e05b9397a06696ce1a1ec"}, + {file = "coverage-7.5.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:923b7b1c717bd0f0f92d862d1ff51d9b2b55dbbd133e05680204465f454bb286"}, + {file = "coverage-7.5.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:62bda40da1e68898186f274f832ef3e759ce929da9a9fd9fcf265956de269dbc"}, + {file = "coverage-7.5.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d8b7339180d00de83e930358223c617cc343dd08e1aa5ec7b06c3a121aec4e1d"}, + {file = "coverage-7.5.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:25a5caf742c6195e08002d3b6c2dd6947e50efc5fc2c2205f61ecb47592d2d83"}, + {file = "coverage-7.5.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:05ac5f60faa0c704c0f7e6a5cbfd6f02101ed05e0aee4d2822637a9e672c998d"}, + {file = "coverage-7.5.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:239a4e75e09c2b12ea478d28815acf83334d32e722e7433471fbf641c606344c"}, + {file = "coverage-7.5.3-cp310-cp310-win32.whl", hash = "sha256:a5812840d1d00eafae6585aba38021f90a705a25b8216ec7f66aebe5b619fb84"}, + {file = "coverage-7.5.3-cp310-cp310-win_amd64.whl", hash = "sha256:33ca90a0eb29225f195e30684ba4a6db05dbef03c2ccd50b9077714c48153cac"}, + {file = "coverage-7.5.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f81bc26d609bf0fbc622c7122ba6307993c83c795d2d6f6f6fd8c000a770d974"}, + {file = "coverage-7.5.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7cec2af81f9e7569280822be68bd57e51b86d42e59ea30d10ebdbb22d2cb7232"}, + {file = "coverage-7.5.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:55f689f846661e3f26efa535071775d0483388a1ccfab899df72924805e9e7cd"}, + {file = "coverage-7.5.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:50084d3516aa263791198913a17354bd1dc627d3c1639209640b9cac3fef5807"}, + {file = "coverage-7.5.3-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:341dd8f61c26337c37988345ca5c8ccabeff33093a26953a1ac72e7d0103c4fb"}, + {file = "coverage-7.5.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ab0b028165eea880af12f66086694768f2c3139b2c31ad5e032c8edbafca6ffc"}, + {file = "coverage-7.5.3-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:5bc5a8c87714b0c67cfeb4c7caa82b2d71e8864d1a46aa990b5588fa953673b8"}, + {file = "coverage-7.5.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:38a3b98dae8a7c9057bd91fbf3415c05e700a5114c5f1b5b0ea5f8f429ba6614"}, + {file = "coverage-7.5.3-cp311-cp311-win32.whl", hash = "sha256:fcf7d1d6f5da887ca04302db8e0e0cf56ce9a5e05f202720e49b3e8157ddb9a9"}, + {file = "coverage-7.5.3-cp311-cp311-win_amd64.whl", hash = "sha256:8c836309931839cca658a78a888dab9676b5c988d0dd34ca247f5f3e679f4e7a"}, + {file = "coverage-7.5.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:296a7d9bbc598e8744c00f7a6cecf1da9b30ae9ad51c566291ff1314e6cbbed8"}, + {file = "coverage-7.5.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:34d6d21d8795a97b14d503dcaf74226ae51eb1f2bd41015d3ef332a24d0a17b3"}, + {file = "coverage-7.5.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e317953bb4c074c06c798a11dbdd2cf9979dbcaa8ccc0fa4701d80042d4ebf1"}, + {file = "coverage-7.5.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:705f3d7c2b098c40f5b81790a5fedb274113373d4d1a69e65f8b68b0cc26f6db"}, + {file = "coverage-7.5.3-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b1196e13c45e327d6cd0b6e471530a1882f1017eb83c6229fc613cd1a11b53cd"}, + {file = "coverage-7.5.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:015eddc5ccd5364dcb902eaecf9515636806fa1e0d5bef5769d06d0f31b54523"}, + {file = "coverage-7.5.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:fd27d8b49e574e50caa65196d908f80e4dff64d7e592d0c59788b45aad7e8b35"}, + {file = "coverage-7.5.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:33fc65740267222fc02975c061eb7167185fef4cc8f2770267ee8bf7d6a42f84"}, + {file = "coverage-7.5.3-cp312-cp312-win32.whl", hash = "sha256:7b2a19e13dfb5c8e145c7a6ea959485ee8e2204699903c88c7d25283584bfc08"}, + {file = "coverage-7.5.3-cp312-cp312-win_amd64.whl", hash = "sha256:0bbddc54bbacfc09b3edaec644d4ac90c08ee8ed4844b0f86227dcda2d428fcb"}, + {file = "coverage-7.5.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:f78300789a708ac1f17e134593f577407d52d0417305435b134805c4fb135adb"}, + {file = "coverage-7.5.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:b368e1aee1b9b75757942d44d7598dcd22a9dbb126affcbba82d15917f0cc155"}, + {file = "coverage-7.5.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f836c174c3a7f639bded48ec913f348c4761cbf49de4a20a956d3431a7c9cb24"}, + {file = "coverage-7.5.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:244f509f126dc71369393ce5fea17c0592c40ee44e607b6d855e9c4ac57aac98"}, + {file = "coverage-7.5.3-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c4c2872b3c91f9baa836147ca33650dc5c172e9273c808c3c3199c75490e709d"}, + {file = "coverage-7.5.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:dd4b3355b01273a56b20c219e74e7549e14370b31a4ffe42706a8cda91f19f6d"}, + {file = "coverage-7.5.3-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:f542287b1489c7a860d43a7d8883e27ca62ab84ca53c965d11dac1d3a1fab7ce"}, + {file = "coverage-7.5.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:75e3f4e86804023e991096b29e147e635f5e2568f77883a1e6eed74512659ab0"}, + {file = "coverage-7.5.3-cp38-cp38-win32.whl", hash = "sha256:c59d2ad092dc0551d9f79d9d44d005c945ba95832a6798f98f9216ede3d5f485"}, + {file = "coverage-7.5.3-cp38-cp38-win_amd64.whl", hash = "sha256:fa21a04112c59ad54f69d80e376f7f9d0f5f9123ab87ecd18fbb9ec3a2beed56"}, + {file = "coverage-7.5.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:f5102a92855d518b0996eb197772f5ac2a527c0ec617124ad5242a3af5e25f85"}, + {file = "coverage-7.5.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:d1da0a2e3b37b745a2b2a678a4c796462cf753aebf94edcc87dcc6b8641eae31"}, + {file = "coverage-7.5.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8383a6c8cefba1b7cecc0149415046b6fc38836295bc4c84e820872eb5478b3d"}, + {file = "coverage-7.5.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9aad68c3f2566dfae84bf46295a79e79d904e1c21ccfc66de88cd446f8686341"}, + {file = "coverage-7.5.3-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2e079c9ec772fedbade9d7ebc36202a1d9ef7291bc9b3a024ca395c4d52853d7"}, + {file = "coverage-7.5.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:bde997cac85fcac227b27d4fb2c7608a2c5f6558469b0eb704c5726ae49e1c52"}, + {file = "coverage-7.5.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:990fb20b32990b2ce2c5f974c3e738c9358b2735bc05075d50a6f36721b8f303"}, + {file = "coverage-7.5.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:3d5a67f0da401e105753d474369ab034c7bae51a4c31c77d94030d59e41df5bd"}, + {file = "coverage-7.5.3-cp39-cp39-win32.whl", hash = "sha256:e08c470c2eb01977d221fd87495b44867a56d4d594f43739a8028f8646a51e0d"}, + {file = "coverage-7.5.3-cp39-cp39-win_amd64.whl", hash = "sha256:1d2a830ade66d3563bb61d1e3c77c8def97b30ed91e166c67d0632c018f380f0"}, + {file = "coverage-7.5.3-pp38.pp39.pp310-none-any.whl", hash = "sha256:3538d8fb1ee9bdd2e2692b3b18c22bb1c19ffbefd06880f5ac496e42d7bb3884"}, + {file = "coverage-7.5.3.tar.gz", hash = "sha256:04aefca5190d1dc7a53a4c1a5a7f8568811306d7a8ee231c42fb69215571944f"}, ] [package.dependencies] @@ -885,69 +885,69 @@ test-randomorder = ["pytest-randomly"] [[package]] name = "cython" -version = "3.0.8" +version = "3.0.10" description = "The Cython compiler for writing C extensions in the Python language." optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" -files = [ - {file = "Cython-3.0.8-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a846e0a38e2b24e9a5c5dc74b0e54c6e29420d88d1dafabc99e0fc0f3e338636"}, - {file = "Cython-3.0.8-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:45523fdc2b78d79b32834cc1cc12dc2ca8967af87e22a3ee1bff20e77c7f5520"}, - {file = "Cython-3.0.8-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:baa0b7f3f841fe087410cab66778e2d3fb20ae2d2078a2be3dffe66c6574be39"}, - {file = "Cython-3.0.8-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e87294e33e40c289c77a135f491cd721bd089f193f956f7b8ed5aa2d0b8c558f"}, - {file = "Cython-3.0.8-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:a1df7a129344b1215c20096d33c00193437df1a8fcca25b71f17c23b1a44f782"}, - {file = "Cython-3.0.8-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:13c2a5e57a0358da467d97667297bf820b62a1a87ae47c5f87938b9bb593acbd"}, - {file = "Cython-3.0.8-cp310-cp310-win32.whl", hash = "sha256:96b028f044f5880e3cb18ecdcfc6c8d3ce9d0af28418d5ab464509f26d8adf12"}, - {file = "Cython-3.0.8-cp310-cp310-win_amd64.whl", hash = "sha256:8140597a8b5cc4f119a1190f5a2228a84f5ca6d8d9ec386cfce24663f48b2539"}, - {file = "Cython-3.0.8-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:aae26f9663e50caf9657148403d9874eea41770ecdd6caf381d177c2b1bb82ba"}, - {file = "Cython-3.0.8-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:547eb3cdb2f8c6f48e6865d5a741d9dd051c25b3ce076fbca571727977b28ac3"}, - {file = "Cython-3.0.8-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5a567d4b9ba70b26db89d75b243529de9e649a2f56384287533cf91512705bee"}, - {file = "Cython-3.0.8-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:51d1426263b0e82fb22bda8ea60dc77a428581cc19e97741011b938445d383f1"}, - {file = "Cython-3.0.8-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:c26daaeccda072459b48d211415fd1e5507c06bcd976fa0d5b8b9f1063467d7b"}, - {file = "Cython-3.0.8-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:289ce7838208211cd166e975865fd73b0649bf118170b6cebaedfbdaf4a37795"}, - {file = "Cython-3.0.8-cp311-cp311-win32.whl", hash = "sha256:c8aa05f5e17f8042a3be052c24f2edc013fb8af874b0bf76907d16c51b4e7871"}, - {file = "Cython-3.0.8-cp311-cp311-win_amd64.whl", hash = "sha256:000dc9e135d0eec6ecb2b40a5b02d0868a2f8d2e027a41b0fe16a908a9e6de02"}, - {file = "Cython-3.0.8-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:90d3fe31db55685d8cb97d43b0ec39ef614fcf660f83c77ed06aa670cb0e164f"}, - {file = "Cython-3.0.8-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e24791ddae2324e88e3c902a765595c738f19ae34ee66bfb1a6dac54b1833419"}, - {file = "Cython-3.0.8-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2f020fa1c0552052e0660790b8153b79e3fc9a15dbd8f1d0b841fe5d204a6ae6"}, - {file = "Cython-3.0.8-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:18bfa387d7a7f77d7b2526af69a65dbd0b731b8d941aaff5becff8e21f6d7717"}, - {file = "Cython-3.0.8-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:fe81b339cffd87c0069c6049b4d33e28bdd1874625ee515785bf42c9fdff3658"}, - {file = "Cython-3.0.8-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:80fd94c076e1e1b1ee40a309be03080b75f413e8997cddcf401a118879863388"}, - {file = "Cython-3.0.8-cp312-cp312-win32.whl", hash = "sha256:85077915a93e359a9b920280d214dc0cf8a62773e1f3d7d30fab8ea4daed670c"}, - {file = "Cython-3.0.8-cp312-cp312-win_amd64.whl", hash = "sha256:0cb2dcc565c7851f75d496f724a384a790fab12d1b82461b663e66605bec429a"}, - {file = "Cython-3.0.8-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:870d2a0a7e3cbd5efa65aecdb38d715ea337a904ea7bb22324036e78fb7068e7"}, - {file = "Cython-3.0.8-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7e8f2454128974905258d86534f4fd4f91d2f1343605657ecab779d80c9d6d5e"}, - {file = "Cython-3.0.8-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c1949d6aa7bc792554bee2b67a9fe41008acbfe22f4f8df7b6ec7b799613a4b3"}, - {file = "Cython-3.0.8-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c9f2c6e1b8f3bcd6cb230bac1843f85114780bb8be8614855b1628b36bb510e0"}, - {file = "Cython-3.0.8-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:05d7eddc668ae7993643f32c7661f25544e791edb745758672ea5b1a82ecffa6"}, - {file = "Cython-3.0.8-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:bfabe115deef4ada5d23c87bddb11289123336dcc14347011832c07db616dd93"}, - {file = "Cython-3.0.8-cp36-cp36m-win32.whl", hash = "sha256:0c38c9f0bcce2df0c3347285863621be904ac6b64c5792d871130569d893efd7"}, - {file = "Cython-3.0.8-cp36-cp36m-win_amd64.whl", hash = "sha256:6c46939c3983217d140999de7c238c3141f56b1ea349e47ca49cae899969aa2c"}, - {file = "Cython-3.0.8-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:115f0a50f752da6c99941b103b5cb090da63eb206abbc7c2ad33856ffc73f064"}, - {file = "Cython-3.0.8-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c9c0f29246734561c90f36e70ed0506b61aa3d044e4cc4cba559065a2a741fae"}, - {file = "Cython-3.0.8-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1ab75242869ff71e5665fe5c96f3378e79e792fa3c11762641b6c5afbbbbe026"}, - {file = "Cython-3.0.8-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6717c06e9cfc6c1df18543cd31a21f5d8e378a40f70c851fa2d34f0597037abc"}, - {file = "Cython-3.0.8-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:9d3f74388db378a3c6fd06e79a809ed98df3f56484d317b81ee762dbf3c263e0"}, - {file = "Cython-3.0.8-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:ae7ac561fd8253a9ae96311e91d12af5f701383564edc11d6338a7b60b285a6f"}, - {file = "Cython-3.0.8-cp37-cp37m-win32.whl", hash = "sha256:97b2a45845b993304f1799664fa88da676ee19442b15fdcaa31f9da7e1acc434"}, - {file = "Cython-3.0.8-cp37-cp37m-win_amd64.whl", hash = "sha256:9e2be2b340fea46fb849d378f9b80d3c08ff2e81e2bfbcdb656e2e3cd8c6b2dc"}, - {file = "Cython-3.0.8-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2cde23c555470db3f149ede78b518e8274853745289c956a0e06ad8d982e4db9"}, - {file = "Cython-3.0.8-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7990ca127e1f1beedaf8fc8bf66541d066ef4723ad7d8d47a7cbf842e0f47580"}, - {file = "Cython-3.0.8-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4b983c8e6803f016146c26854d9150ddad5662960c804ea7f0c752c9266752f0"}, - {file = "Cython-3.0.8-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a973268d7ca1a2bdf78575e459a94a78e1a0a9bb62a7db0c50041949a73b02ff"}, - {file = "Cython-3.0.8-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:61a237bc9dd23c7faef0fcfce88c11c65d0c9bb73c74ccfa408b3a012073c20e"}, - {file = "Cython-3.0.8-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:3a3d67f079598af49e90ff9655bf85bd358f093d727eb21ca2708f467c489cae"}, - {file = "Cython-3.0.8-cp38-cp38-win32.whl", hash = "sha256:17a642bb01a693e34c914106566f59844b4461665066613913463a719e0dd15d"}, - {file = "Cython-3.0.8-cp38-cp38-win_amd64.whl", hash = "sha256:2cdfc32252f3b6dc7c94032ab744dcedb45286733443c294d8f909a4854e7f83"}, - {file = "Cython-3.0.8-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:fa97893d99385386925d00074654aeae3a98867f298d1e12ceaf38a9054a9bae"}, - {file = "Cython-3.0.8-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f05c0bf9d085c031df8f583f0d506aa3be1692023de18c45d0aaf78685bbb944"}, - {file = "Cython-3.0.8-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:de892422582f5758bd8de187e98ac829330ec1007bc42c661f687792999988a7"}, - {file = "Cython-3.0.8-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:314f2355a1f1d06e3c431eaad4708cf10037b5e91e4b231d89c913989d0bdafd"}, - {file = "Cython-3.0.8-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:78825a3774211e7d5089730f00cdf7f473042acc9ceb8b9eeebe13ed3a5541de"}, - {file = "Cython-3.0.8-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:df8093deabc55f37028190cf5e575c26aad23fc673f34b85d5f45076bc37ce39"}, - {file = "Cython-3.0.8-cp39-cp39-win32.whl", hash = "sha256:1aca1b97e0095b3a9a6c33eada3f661a4ed0d499067d121239b193e5ba3bb4f0"}, - {file = "Cython-3.0.8-cp39-cp39-win_amd64.whl", hash = "sha256:16873d78be63bd38ffb759da7ab82814b36f56c769ee02b1d5859560e4c3ac3c"}, - {file = "Cython-3.0.8-py2.py3-none-any.whl", hash = "sha256:171b27051253d3f9108e9759e504ba59ff06e7f7ba944457f94deaf9c21bf0b6"}, - {file = "Cython-3.0.8.tar.gz", hash = "sha256:8333423d8fd5765e7cceea3a9985dd1e0a5dfeb2734629e1a2ed2d6233d39de6"}, +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,>=2.7" +files = [ + {file = "Cython-3.0.10-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e876272548d73583e90babda94c1299537006cad7a34e515a06c51b41f8657aa"}, + {file = "Cython-3.0.10-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:adc377aa33c3309191e617bf675fdbb51ca727acb9dc1aa23fc698d8121f7e23"}, + {file = "Cython-3.0.10-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:401aba1869a57aba2922ccb656a6320447e55ace42709b504c2f8e8b166f46e1"}, + {file = "Cython-3.0.10-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:541fbe725d6534a90b93f8c577eb70924d664b227a4631b90a6e0506d1469591"}, + {file = "Cython-3.0.10-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:86998b01f6a6d48398df8467292c7637e57f7e3a2ca68655367f13f66fed7734"}, + {file = "Cython-3.0.10-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:d092c0ddba7e9e530a5c5be4ac06db8360258acc27675d1fc86294a5dc8994c5"}, + {file = "Cython-3.0.10-cp310-cp310-win32.whl", hash = "sha256:3cffb666e649dba23810732497442fb339ee67ba4e0be1f0579991e83fcc2436"}, + {file = "Cython-3.0.10-cp310-cp310-win_amd64.whl", hash = "sha256:9ea31184c7b3a728ef1f81fccb161d8948c05aa86c79f63b74fb6f3ddec860ec"}, + {file = "Cython-3.0.10-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:051069638abfb076900b0c2bcb6facf545655b3f429e80dd14365192074af5a4"}, + {file = "Cython-3.0.10-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:712760879600907189c7d0d346851525545484e13cd8b787e94bfd293da8ccf0"}, + {file = "Cython-3.0.10-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:38d40fa1324ac47c04483d151f5e092406a147eac88a18aec789cf01c089c3f2"}, + {file = "Cython-3.0.10-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5bd49a3a9fdff65446a3e1c2bfc0ec85c6ce4c3cad27cd4ad7ba150a62b7fb59"}, + {file = "Cython-3.0.10-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e8df79b596633b8295eaa48b1157d796775c2bb078f32267d32f3001b687f2fd"}, + {file = "Cython-3.0.10-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:bcc9795990e525c192bc5c0775e441d7d56d7a7d02210451e9e13c0448dba51b"}, + {file = "Cython-3.0.10-cp311-cp311-win32.whl", hash = "sha256:09f2000041db482cad3bfce94e1fa3a4c82b0e57390a164c02566cbbda8c4f12"}, + {file = "Cython-3.0.10-cp311-cp311-win_amd64.whl", hash = "sha256:3919a55ec9b6c7db6f68a004c21c05ed540c40dbe459ced5d801d5a1f326a053"}, + {file = "Cython-3.0.10-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:8f2864ab5fcd27a346f0b50f901ebeb8f60b25a60a575ccfd982e7f3e9674914"}, + {file = "Cython-3.0.10-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:407840c56385b9c085826fe300213e0e76ba15d1d47daf4b58569078ecb94446"}, + {file = "Cython-3.0.10-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5a036d00caa73550a3a976432ef21c1e3fa12637e1616aab32caded35331ae96"}, + {file = "Cython-3.0.10-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9cc6a0e7e23a96dec3f3c9d39690d4281beabd5297855140d0d30855f950275e"}, + {file = "Cython-3.0.10-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:a5e14a8c6a8157d2b0cdc2e8e3444905d20a0e78e19d2a097e89fb8b04b51f6b"}, + {file = "Cython-3.0.10-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:f8a2b8fa0fd8358bccb5f3304be563c4750aae175100463d212d5ea0ec74cbe0"}, + {file = "Cython-3.0.10-cp312-cp312-win32.whl", hash = "sha256:2d29e617fd23cf4b83afe8f93f2966566c9f565918ad1e86a4502fe825cc0a79"}, + {file = "Cython-3.0.10-cp312-cp312-win_amd64.whl", hash = "sha256:6c5af936940a38c300977b81598d9c0901158f220a58c177820e17e1774f1cf1"}, + {file = "Cython-3.0.10-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:5f465443917d5c0f69825fca3b52b64c74ac3de0143b1fff6db8ba5b48c9fb4a"}, + {file = "Cython-3.0.10-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4fadb84193c25641973666e583df8df4e27c52cdc05ddce7c6f6510d690ba34a"}, + {file = "Cython-3.0.10-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9fa9e7786083b6aa61594c16979d621b62e61fcd9c2edd4761641b95c7fb34b2"}, + {file = "Cython-3.0.10-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f4780d0f98ce28191c4d841c4358b5d5e79d96520650910cd59904123821c52d"}, + {file = "Cython-3.0.10-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:32fbad02d1189be75eb96456d9c73f5548078e5338d8fa153ecb0115b6ee279f"}, + {file = "Cython-3.0.10-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:90e2f514fc753b55245351305a399463103ec18666150bb1c36779b9862388e9"}, + {file = "Cython-3.0.10-cp36-cp36m-win32.whl", hash = "sha256:a9c976e9ec429539a4367cb4b24d15a1e46b925976f4341143f49f5f161171f5"}, + {file = "Cython-3.0.10-cp36-cp36m-win_amd64.whl", hash = "sha256:a9bb402674788a7f4061aeef8057632ec440123e74ed0fb425308a59afdfa10e"}, + {file = "Cython-3.0.10-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:206e803598010ecc3813db8748ed685f7beeca6c413f982df9f8a505fce56563"}, + {file = "Cython-3.0.10-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:15b6d397f4ee5ad54e373589522af37935a32863f1b23fa8c6922adf833e28e2"}, + {file = "Cython-3.0.10-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a181144c2f893ed8e6a994d43d0b96300bc99873f21e3b7334ca26c61c37b680"}, + {file = "Cython-3.0.10-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b74b700d6a793113d03fb54b63bdbadba6365379424bac7c0470605672769260"}, + {file = "Cython-3.0.10-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:076e9fd4e0ca33c5fa00a7479180dbfb62f17fe928e2909f82da814536e96d2b"}, + {file = "Cython-3.0.10-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:269f06e6961e8591d56e30b46e1a51b6ccb42cab04c29fa3b30d3e8723485fb4"}, + {file = "Cython-3.0.10-cp37-cp37m-win32.whl", hash = "sha256:d4e83a8ceff7af60064da4ccfce0ac82372544dd5392f1b350c34f1b04d0fae6"}, + {file = "Cython-3.0.10-cp37-cp37m-win_amd64.whl", hash = "sha256:40fac59c3a7fbcd9c25aea64c342c890a5e2270ce64a1525e840807800167799"}, + {file = "Cython-3.0.10-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:f43a58bf2434870d2fc42ac2e9ff8138c9e00c6251468de279d93fa279e9ba3b"}, + {file = "Cython-3.0.10-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0e9a885ec63d3955a08cefc4eec39fefa9fe14989c6e5e2382bd4aeb6bdb9bc3"}, + {file = "Cython-3.0.10-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:acfbe0fff364d54906058fc61f2393f38cd7fa07d344d80923937b87e339adcf"}, + {file = "Cython-3.0.10-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8adcde00a8a88fab27509b558cd8c2959ab0c70c65d3814cfea8c68b83fa6dcd"}, + {file = "Cython-3.0.10-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:2c9c1e3e78909488f3b16fabae02308423fa6369ed96ab1e250807d344cfffd7"}, + {file = "Cython-3.0.10-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:fc6e0faf5b57523b073f0cdefadcaef3a51235d519a0594865925cadb3aeadf0"}, + {file = "Cython-3.0.10-cp38-cp38-win32.whl", hash = "sha256:35f6ede7c74024ed1982832ae61c9fad7cf60cc3f5b8c6a63bb34e38bc291936"}, + {file = "Cython-3.0.10-cp38-cp38-win_amd64.whl", hash = "sha256:950c0c7b770d2a7cec74fb6f5ccc321d0b51d151f48c075c0d0db635a60ba1b5"}, + {file = "Cython-3.0.10-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:077b61ee789e48700e25d4a16daa4258b8e65167136e457174df400cf9b4feab"}, + {file = "Cython-3.0.10-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:64f1f8bba9d8f37c0cffc934792b4ac7c42d0891077127c11deebe9fa0a0f7e4"}, + {file = "Cython-3.0.10-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:651a15a8534ebfb9b58cb0b87c269c70984b6f9c88bfe65e4f635f0e3f07dfcd"}, + {file = "Cython-3.0.10-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d10fc9aa82e5e53a0b7fd118f9771199cddac8feb4a6d8350b7d4109085aa775"}, + {file = "Cython-3.0.10-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:4f610964ab252a83e573a427e28b103e2f1dd3c23bee54f32319f9e73c3c5499"}, + {file = "Cython-3.0.10-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8c9c4c4f3ab8f8c02817b0e16e8fa7b8cc880f76e9b63fe9c010e60c1a6c2b13"}, + {file = "Cython-3.0.10-cp39-cp39-win32.whl", hash = "sha256:0bac3ccdd4e03924028220c62ae3529e17efa8ca7e9df9330de95de02f582b26"}, + {file = "Cython-3.0.10-cp39-cp39-win_amd64.whl", hash = "sha256:81f356c1c8c0885b8435bfc468025f545c5d764aa9c75ab662616dd1193c331e"}, + {file = "Cython-3.0.10-py2.py3-none-any.whl", hash = "sha256:fcbb679c0b43514d591577fd0d20021c55c240ca9ccafbdb82d3fb95e5edfee2"}, + {file = "Cython-3.0.10.tar.gz", hash = "sha256:dcc96739331fb854dcf503f94607576cfe8488066c61ca50dfd55836f132de99"}, ] [[package]] @@ -1030,58 +1030,58 @@ files = [ [[package]] name = "duckdb" -version = "0.10.2" +version = "0.10.3" description = "DuckDB in-process database" optional = true python-versions = ">=3.7.0" files = [ - {file = "duckdb-0.10.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3891d3ac03e12a3e5c43afa3020fe701f64060f52d25f429a1ed7b5d914368d3"}, - {file = "duckdb-0.10.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:4f63877651f1fb940e049dc53038eb763856616319acf4f892b1c3ed074f5ab0"}, - {file = "duckdb-0.10.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:06e3a36f04f4d98d2c0bbdd63e517cfbe114a795306e26ec855e62e076af5043"}, - {file = "duckdb-0.10.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cf5f95ad5b75c8e65c6508b4df02043dd0b9d97712b9a33236ad77c388ce7861"}, - {file = "duckdb-0.10.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ff62bc98278c98fecbd6eecec5d698ad41ebd654110feaadbf8ac8bb59b1ecf"}, - {file = "duckdb-0.10.2-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:cceede13fde095c23cf9a53adf7c414c7bfb21b9a7aa6a4836014fdbecbfca70"}, - {file = "duckdb-0.10.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:acdfff60b7efccd7f731213a9795851256249dfacf80367074b2b2e144f716dd"}, - {file = "duckdb-0.10.2-cp310-cp310-win_amd64.whl", hash = "sha256:4a5d5655cf0bdaf664a6f332afe465e02b08cef715548a0983bb7aef48da06a6"}, - {file = "duckdb-0.10.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:a9d15842876d18763e085648656cccc7660a215d16254906db5c4471be2c7732"}, - {file = "duckdb-0.10.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c88cdcdc8452c910e4298223e7d9fca291534ff5aa36090aa49c9e6557550b13"}, - {file = "duckdb-0.10.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:364cd6f5dc8a1010d144d08c410ba9a74c521336ee5bda84fabc6616216a6d6a"}, - {file = "duckdb-0.10.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4c57c11d1060296f5e9ebfb5bb7e5521e0d77912e8f9ff43c90240c3311e9de9"}, - {file = "duckdb-0.10.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:186d86b8dda8e1076170eb770bb2bb73ea88ca907d92885c9695d6515207b205"}, - {file = "duckdb-0.10.2-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5f65b62f31c6bff21afc0261cfe28d238b8f34ec78f339546b12f4740c39552a"}, - {file = "duckdb-0.10.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:a860d7466a5c93714cdd94559ce9e1db2ab91914f0941c25e5e93d4ebe36a5fa"}, - {file = "duckdb-0.10.2-cp311-cp311-win_amd64.whl", hash = "sha256:33308190e9c7f05a3a0a2d46008a043effd4eae77011869d7c18fb37acdd9215"}, - {file = "duckdb-0.10.2-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:3a8b2f1229b4aecb79cd28ffdb99032b1497f0a805d0da1136a9b6115e1afc70"}, - {file = "duckdb-0.10.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:d23a6dea61963733a0f45a0d0bbb1361fb2a47410ed5ff308b4a1f869d4eeb6f"}, - {file = "duckdb-0.10.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:20ee0aa27e688aa52a40b434ec41a50431d0b06edeab88edc2feaca18d82c62c"}, - {file = "duckdb-0.10.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:80a6d43d9044f0997a15a92e0c0ff3afd21151a1e572a92f439cc4f56b7090e1"}, - {file = "duckdb-0.10.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6934758cacd06029a5c9f54556a43bd277a86757e22bf8d0dd11ca15c1813d1c"}, - {file = "duckdb-0.10.2-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7a11e2d68bd79044eea5486b1cddb5b915115f537e5c74eeb94c768ce30f9f4b"}, - {file = "duckdb-0.10.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:0bf58385c43b8e448a2fea7e8729054934bf73ea616d1d7ef8184eda07f975e2"}, - {file = "duckdb-0.10.2-cp312-cp312-win_amd64.whl", hash = "sha256:eae75c7014597ded6e7f6dc51e32d48362a31608acd73e9f795748ee94335a54"}, - {file = "duckdb-0.10.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:62e89deff778a7a86f651802b947a3466425f6cce41e9d7d412d39e492932943"}, - {file = "duckdb-0.10.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f87e555fd36ec6da316b727a39fb24c53124a797dfa9b451bdea87b2f20a351f"}, - {file = "duckdb-0.10.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:41e8b34b1a944590ebcf82f8cc59d67b084fe99479f048892d60da6c1402c386"}, - {file = "duckdb-0.10.2-cp37-cp37m-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2c68c6dde2773774cf2371522a3959ea2716fc2b3a4891d4066f0e426455fe19"}, - {file = "duckdb-0.10.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:ff6a8a0980d0f9398fa461deffa59465dac190d707468478011ea8a5fe1f2c81"}, - {file = "duckdb-0.10.2-cp37-cp37m-win_amd64.whl", hash = "sha256:728dd4ff0efda387a424754e5508d4f8c72a272c2d3ccb036a83286f60b46002"}, - {file = "duckdb-0.10.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:c461d6b4619e80170044a9eb999bbf4097e330d3a4974ced0a7eaeb79c7c39f6"}, - {file = "duckdb-0.10.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:909351ff72eb3b50b89761251148d8a186594d8a438e12dcf5494794caff6693"}, - {file = "duckdb-0.10.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:d9eeb8393d69abafd355b869669957eb85b89e4df677e420b9ef0693b7aa6cb4"}, - {file = "duckdb-0.10.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3102bcf5011e8f82ea3c2bde43108774fe5a283a410d292c0843610ea13e2237"}, - {file = "duckdb-0.10.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d64d443613e5f16caf7d67102733538c90f7715867c1a98597efd3babca068e3"}, - {file = "duckdb-0.10.2-cp38-cp38-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:cb31398826d1b7473344e5ee8e0f826370c9752549469ba1327042ace9041f80"}, - {file = "duckdb-0.10.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:d09dcec467cd6127d5cc1fb0ce4efbd77e761882d9d772b0f64fc2f79a2a1cde"}, - {file = "duckdb-0.10.2-cp38-cp38-win_amd64.whl", hash = "sha256:82fab1a24faf7c33d8a7afed08b57ee36e8821a3a68a2f1574cd238ea440bba0"}, - {file = "duckdb-0.10.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:38607e6e6618e8ea28c8d9b67aa9e22cfd6d6d673f2e8ab328bd6e867b697f69"}, - {file = "duckdb-0.10.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:fb0c23bc8c09615bff38aebcf8e92e6ae74959c67b3c9e5b00edddc730bf22be"}, - {file = "duckdb-0.10.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:00576c11c78c83830ab483bad968e07cd9b5f730e7ffaf5aa5fadee5ac4f71e9"}, - {file = "duckdb-0.10.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:077db692cdda50c4684ef87dc2a68507665804caa90e539dbe819116bda722ad"}, - {file = "duckdb-0.10.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ca25984ad9f9a04e46e8359f852668c11569534e3bb8424b80be711303ad2314"}, - {file = "duckdb-0.10.2-cp39-cp39-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6a72cc40982c7b92cf555e574618fc711033b013bf258b611ba18d7654c89d8c"}, - {file = "duckdb-0.10.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:d27b9efd6e788eb561535fdc0cbc7c74aca1ff39f748b7cfc27aa49b00e22da1"}, - {file = "duckdb-0.10.2-cp39-cp39-win_amd64.whl", hash = "sha256:4800469489bc262dda61a7f1d40acedf67cf2454874e9d8bbf07920dc2b147e6"}, - {file = "duckdb-0.10.2.tar.gz", hash = "sha256:0f609c9d5f941f1ecde810f010dd9321cd406a552c1df20318a13fa64247f67f"}, + {file = "duckdb-0.10.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:cd25cc8d001c09a19340739ba59d33e12a81ab285b7a6bed37169655e1cefb31"}, + {file = "duckdb-0.10.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2f9259c637b917ca0f4c63887e8d9b35ec248f5d987c886dfc4229d66a791009"}, + {file = "duckdb-0.10.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b48f5f1542f1e4b184e6b4fc188f497be8b9c48127867e7d9a5f4a3e334f88b0"}, + {file = "duckdb-0.10.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e327f7a3951ea154bb56e3fef7da889e790bd9a67ca3c36afc1beb17d3feb6d6"}, + {file = "duckdb-0.10.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5d8b20ed67da004b4481973f4254fd79a0e5af957d2382eac8624b5c527ec48c"}, + {file = "duckdb-0.10.3-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d37680b8d7be04e4709db3a66c8b3eb7ceba2a5276574903528632f2b2cc2e60"}, + {file = "duckdb-0.10.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:3d34b86d6a2a6dfe8bb757f90bfe7101a3bd9e3022bf19dbddfa4b32680d26a9"}, + {file = "duckdb-0.10.3-cp310-cp310-win_amd64.whl", hash = "sha256:73b1cb283ca0f6576dc18183fd315b4e487a545667ffebbf50b08eb4e8cdc143"}, + {file = "duckdb-0.10.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:d917dde19fcec8cadcbef1f23946e85dee626ddc133e1e3f6551f15a61a03c61"}, + {file = "duckdb-0.10.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:46757e0cf5f44b4cb820c48a34f339a9ccf83b43d525d44947273a585a4ed822"}, + {file = "duckdb-0.10.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:338c14d8ac53ac4aa9ec03b6f1325ecfe609ceeb72565124d489cb07f8a1e4eb"}, + {file = "duckdb-0.10.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:651fcb429602b79a3cf76b662a39e93e9c3e6650f7018258f4af344c816dab72"}, + {file = "duckdb-0.10.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d3ae3c73b98b6215dab93cc9bc936b94aed55b53c34ba01dec863c5cab9f8e25"}, + {file = "duckdb-0.10.3-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:56429b2cfe70e367fb818c2be19f59ce2f6b080c8382c4d10b4f90ba81f774e9"}, + {file = "duckdb-0.10.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b46c02c2e39e3676b1bb0dc7720b8aa953734de4fd1b762e6d7375fbeb1b63af"}, + {file = "duckdb-0.10.3-cp311-cp311-win_amd64.whl", hash = "sha256:bcd460feef56575af2c2443d7394d405a164c409e9794a4d94cb5fdaa24a0ba4"}, + {file = "duckdb-0.10.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:e229a7c6361afbb0d0ab29b1b398c10921263c52957aefe3ace99b0426fdb91e"}, + {file = "duckdb-0.10.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:732b1d3b6b17bf2f32ea696b9afc9e033493c5a3b783c292ca4b0ee7cc7b0e66"}, + {file = "duckdb-0.10.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f5380d4db11fec5021389fb85d614680dc12757ef7c5881262742250e0b58c75"}, + {file = "duckdb-0.10.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:468a4e0c0b13c55f84972b1110060d1b0f854ffeb5900a178a775259ec1562db"}, + {file = "duckdb-0.10.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0fa1e7ff8d18d71defa84e79f5c86aa25d3be80d7cb7bc259a322de6d7cc72da"}, + {file = "duckdb-0.10.3-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ed1063ed97c02e9cf2e7fd1d280de2d1e243d72268330f45344c69c7ce438a01"}, + {file = "duckdb-0.10.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:22f2aad5bb49c007f3bfcd3e81fdedbc16a2ae41f2915fc278724ca494128b0c"}, + {file = "duckdb-0.10.3-cp312-cp312-win_amd64.whl", hash = "sha256:8f9e2bb00a048eb70b73a494bdc868ce7549b342f7ffec88192a78e5a4e164bd"}, + {file = "duckdb-0.10.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:a6c2fc49875b4b54e882d68703083ca6f84b27536d57d623fc872e2f502b1078"}, + {file = "duckdb-0.10.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a66c125d0c30af210f7ee599e7821c3d1a7e09208196dafbf997d4e0cfcb81ab"}, + {file = "duckdb-0.10.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d99dd7a1d901149c7a276440d6e737b2777e17d2046f5efb0c06ad3b8cb066a6"}, + {file = "duckdb-0.10.3-cp37-cp37m-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5ec3bbdb209e6095d202202893763e26c17c88293b88ef986b619e6c8b6715bd"}, + {file = "duckdb-0.10.3-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:2b3dec4ef8ed355d7b7230b40950b30d0def2c387a2e8cd7efc80b9d14134ecf"}, + {file = "duckdb-0.10.3-cp37-cp37m-win_amd64.whl", hash = "sha256:04129f94fb49bba5eea22f941f0fb30337f069a04993048b59e2811f52d564bc"}, + {file = "duckdb-0.10.3-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:d75d67024fc22c8edfd47747c8550fb3c34fb1cbcbfd567e94939ffd9c9e3ca7"}, + {file = "duckdb-0.10.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:f3796e9507c02d0ddbba2e84c994fae131da567ce3d9cbb4cbcd32fadc5fbb26"}, + {file = "duckdb-0.10.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:78e539d85ebd84e3e87ec44d28ad912ca4ca444fe705794e0de9be3dd5550c11"}, + {file = "duckdb-0.10.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7a99b67ac674b4de32073e9bc604b9c2273d399325181ff50b436c6da17bf00a"}, + {file = "duckdb-0.10.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1209a354a763758c4017a1f6a9f9b154a83bed4458287af9f71d84664ddb86b6"}, + {file = "duckdb-0.10.3-cp38-cp38-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3b735cea64aab39b67c136ab3a571dbf834067f8472ba2f8bf0341bc91bea820"}, + {file = "duckdb-0.10.3-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:816ffb9f758ed98eb02199d9321d592d7a32a6cb6aa31930f4337eb22cfc64e2"}, + {file = "duckdb-0.10.3-cp38-cp38-win_amd64.whl", hash = "sha256:1631184b94c3dc38b13bce4045bf3ae7e1b0ecbfbb8771eb8d751d8ffe1b59b3"}, + {file = "duckdb-0.10.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:fb98c35fc8dd65043bc08a2414dd9f59c680d7e8656295b8969f3f2061f26c52"}, + {file = "duckdb-0.10.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:7e75c9f5b6a92b2a6816605c001d30790f6d67ce627a2b848d4d6040686efdf9"}, + {file = "duckdb-0.10.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ae786eddf1c2fd003466e13393b9348a44b6061af6fe7bcb380a64cac24e7df7"}, + {file = "duckdb-0.10.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b9387da7b7973707b0dea2588749660dd5dd724273222680e985a2dd36787668"}, + {file = "duckdb-0.10.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:538f943bf9fa8a3a7c4fafa05f21a69539d2c8a68e557233cbe9d989ae232899"}, + {file = "duckdb-0.10.3-cp39-cp39-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6930608f35025a73eb94252964f9f19dd68cf2aaa471da3982cf6694866cfa63"}, + {file = "duckdb-0.10.3-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:03bc54a9cde5490918aad82d7d2a34290e3dfb78d5b889c6626625c0f141272a"}, + {file = "duckdb-0.10.3-cp39-cp39-win_amd64.whl", hash = "sha256:372b6e3901d85108cafe5df03c872dfb6f0dbff66165a0cf46c47246c1957aa0"}, + {file = "duckdb-0.10.3.tar.gz", hash = "sha256:c5bd84a92bc708d3a6adffe1f554b94c6e76c795826daaaf482afc3d9c636971"}, ] [[package]] @@ -1185,13 +1185,13 @@ dotenv = ["python-dotenv"] [[package]] name = "flask-cors" -version = "4.0.0" +version = "4.0.1" description = "A Flask extension adding a decorator for CORS support" optional = false python-versions = "*" files = [ - {file = "Flask-Cors-4.0.0.tar.gz", hash = "sha256:f268522fcb2f73e2ecdde1ef45e2fd5c71cc48fe03cffb4b441c6d1b40684eb0"}, - {file = "Flask_Cors-4.0.0-py2.py3-none-any.whl", hash = "sha256:bc3492bfd6368d27cfe79c7821df5a8a319e1a6d5eab277a3794be19bdc51783"}, + {file = "Flask_Cors-4.0.1-py2.py3-none-any.whl", hash = "sha256:f2a704e4458665580c074b714c4627dd5a306b333deb9074d0b1794dfa2fb677"}, + {file = "flask_cors-4.0.1.tar.gz", hash = "sha256:eeb69b342142fdbf4766ad99357a7f3876a2ceb77689dc10ff912aac06c389e4"}, ] [package.dependencies] @@ -1344,17 +1344,17 @@ gcsfuse = ["fusepy"] [[package]] name = "getdaft" -version = "0.2.23" +version = "0.2.25" description = "Distributed Dataframes for Multimodal Data" optional = true -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "getdaft-0.2.23-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:a59f6084ca865528b26ed478d584f98c102500005314dbc7fc44b7c4b3e18d49"}, - {file = "getdaft-0.2.23-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:dfaf492bb453675999d70626a8fdb6d4ecaecafbf4a0548e68105757a7a4025a"}, - {file = "getdaft-0.2.23-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0d6f4dbb7f3b5d62f8df1006bf55cc657148c2a3962766e62fbd3c2df337fa32"}, - {file = "getdaft-0.2.23-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9bfa567569a8b53e9b0a7ab3eb0044afe8d5499d995bfeb40bd867661bfa2aa7"}, - {file = "getdaft-0.2.23-cp37-abi3-win_amd64.whl", hash = "sha256:533b78abefa738cac97a6823ef2b8f2df3300bf2d4bda4e8336371fc2780bbb9"}, - {file = "getdaft-0.2.23.tar.gz", hash = "sha256:c2d66e6a4ce75aeb4cedbe2c04c18fa8f3f7dcfe2799f66211f36c7be2f835a5"}, + {file = "getdaft-0.2.25-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:7aab5bdf4af6b9bb0f7e0555cd36762d57da97ed026017f3a4b00f97bf5bf7f1"}, + {file = "getdaft-0.2.25-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:12a95f0ce9206c77a439ace0dc705d13acbe0e8278907ad2e57f62e0c01330ad"}, + {file = "getdaft-0.2.25-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2cfeef90e2f446f65e0e7292431e5354995fe693cf9bbbd434dafd4b8971ea83"}, + {file = "getdaft-0.2.25-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1b86a42e7310de613a0fb30d68a70ee0678e6605023e48a3c1dd28f8752d380e"}, + {file = "getdaft-0.2.25-cp38-abi3-win_amd64.whl", hash = "sha256:fbb3437e666478d06e661d961e5fd10b8cc33385bd2bafafcd22daf403fe6df1"}, + {file = "getdaft-0.2.25.tar.gz", hash = "sha256:60b2ca7d39447ba4b19eab6ccfd6fc706914ecf43d0080a13c832b013dda589b"}, ] [package.dependencies] @@ -2212,13 +2212,13 @@ test = ["mypy (>=1.0)", "pytest (>=7.0.0)"] [[package]] name = "moto" -version = "5.0.6" +version = "5.0.9" description = "" optional = false python-versions = ">=3.8" files = [ - {file = "moto-5.0.6-py2.py3-none-any.whl", hash = "sha256:ca1e22831a741733b581ff2ef4d6ae2e1c6db1eab97af1b78b86ca2c6e88c609"}, - {file = "moto-5.0.6.tar.gz", hash = "sha256:ad8b23f2b555ad694da8b2432a42b6d96beaaf67a4e7d932196a72193a2eee2c"}, + {file = "moto-5.0.9-py2.py3-none-any.whl", hash = "sha256:21a13e02f83d6a18cfcd99949c96abb2e889f4bd51c4c6a3ecc8b78765cb854e"}, + {file = "moto-5.0.9.tar.gz", hash = "sha256:eb71f1cba01c70fff1f16086acb24d6d9aeb32830d646d8989f98a29aeae24ba"}, ] [package.dependencies] @@ -2237,7 +2237,7 @@ joserfc = {version = ">=0.9.0", optional = true, markers = "extra == \"server\"" jsondiff = {version = ">=1.1.2", optional = true, markers = "extra == \"server\""} jsonpath-ng = {version = "*", optional = true, markers = "extra == \"server\""} openapi-spec-validator = {version = ">=0.5.0", optional = true, markers = "extra == \"server\""} -py-partiql-parser = {version = "0.5.4", optional = true, markers = "extra == \"server\""} +py-partiql-parser = {version = "0.5.5", optional = true, markers = "extra == \"server\""} pyparsing = {version = ">=3.0.7", optional = true, markers = "extra == \"server\""} python-dateutil = ">=2.1,<3.0.0" PyYAML = {version = ">=5.1", optional = true, markers = "extra == \"server\""} @@ -2248,23 +2248,23 @@ werkzeug = ">=0.5,<2.2.0 || >2.2.0,<2.2.1 || >2.2.1" xmltodict = "*" [package.extras] -all = ["PyYAML (>=5.1)", "antlr4-python3-runtime", "aws-xray-sdk (>=0.93,!=0.96)", "cfn-lint (>=0.40.0)", "docker (>=3.0.0)", "graphql-core", "joserfc (>=0.9.0)", "jsondiff (>=1.1.2)", "jsonpath-ng", "multipart", "openapi-spec-validator (>=0.5.0)", "py-partiql-parser (==0.5.4)", "pyparsing (>=3.0.7)", "setuptools"] +all = ["PyYAML (>=5.1)", "antlr4-python3-runtime", "aws-xray-sdk (>=0.93,!=0.96)", "cfn-lint (>=0.40.0)", "docker (>=3.0.0)", "graphql-core", "joserfc (>=0.9.0)", "jsondiff (>=1.1.2)", "jsonpath-ng", "multipart", "openapi-spec-validator (>=0.5.0)", "py-partiql-parser (==0.5.5)", "pyparsing (>=3.0.7)", "setuptools"] apigateway = ["PyYAML (>=5.1)", "joserfc (>=0.9.0)", "openapi-spec-validator (>=0.5.0)"] apigatewayv2 = ["PyYAML (>=5.1)", "openapi-spec-validator (>=0.5.0)"] appsync = ["graphql-core"] awslambda = ["docker (>=3.0.0)"] batch = ["docker (>=3.0.0)"] -cloudformation = ["PyYAML (>=5.1)", "aws-xray-sdk (>=0.93,!=0.96)", "cfn-lint (>=0.40.0)", "docker (>=3.0.0)", "graphql-core", "joserfc (>=0.9.0)", "jsondiff (>=1.1.2)", "openapi-spec-validator (>=0.5.0)", "py-partiql-parser (==0.5.4)", "pyparsing (>=3.0.7)", "setuptools"] +cloudformation = ["PyYAML (>=5.1)", "aws-xray-sdk (>=0.93,!=0.96)", "cfn-lint (>=0.40.0)", "docker (>=3.0.0)", "graphql-core", "joserfc (>=0.9.0)", "jsondiff (>=1.1.2)", "openapi-spec-validator (>=0.5.0)", "py-partiql-parser (==0.5.5)", "pyparsing (>=3.0.7)", "setuptools"] cognitoidp = ["joserfc (>=0.9.0)"] -dynamodb = ["docker (>=3.0.0)", "py-partiql-parser (==0.5.4)"] -dynamodbstreams = ["docker (>=3.0.0)", "py-partiql-parser (==0.5.4)"] +dynamodb = ["docker (>=3.0.0)", "py-partiql-parser (==0.5.5)"] +dynamodbstreams = ["docker (>=3.0.0)", "py-partiql-parser (==0.5.5)"] glue = ["pyparsing (>=3.0.7)"] iotdata = ["jsondiff (>=1.1.2)"] -proxy = ["PyYAML (>=5.1)", "antlr4-python3-runtime", "aws-xray-sdk (>=0.93,!=0.96)", "cfn-lint (>=0.40.0)", "docker (>=2.5.1)", "graphql-core", "joserfc (>=0.9.0)", "jsondiff (>=1.1.2)", "jsonpath-ng", "multipart", "openapi-spec-validator (>=0.5.0)", "py-partiql-parser (==0.5.4)", "pyparsing (>=3.0.7)", "setuptools"] -resourcegroupstaggingapi = ["PyYAML (>=5.1)", "cfn-lint (>=0.40.0)", "docker (>=3.0.0)", "graphql-core", "joserfc (>=0.9.0)", "jsondiff (>=1.1.2)", "openapi-spec-validator (>=0.5.0)", "py-partiql-parser (==0.5.4)", "pyparsing (>=3.0.7)"] -s3 = ["PyYAML (>=5.1)", "py-partiql-parser (==0.5.4)"] -s3crc32c = ["PyYAML (>=5.1)", "crc32c", "py-partiql-parser (==0.5.4)"] -server = ["PyYAML (>=5.1)", "antlr4-python3-runtime", "aws-xray-sdk (>=0.93,!=0.96)", "cfn-lint (>=0.40.0)", "docker (>=3.0.0)", "flask (!=2.2.0,!=2.2.1)", "flask-cors", "graphql-core", "joserfc (>=0.9.0)", "jsondiff (>=1.1.2)", "jsonpath-ng", "openapi-spec-validator (>=0.5.0)", "py-partiql-parser (==0.5.4)", "pyparsing (>=3.0.7)", "setuptools"] +proxy = ["PyYAML (>=5.1)", "antlr4-python3-runtime", "aws-xray-sdk (>=0.93,!=0.96)", "cfn-lint (>=0.40.0)", "docker (>=2.5.1)", "graphql-core", "joserfc (>=0.9.0)", "jsondiff (>=1.1.2)", "jsonpath-ng", "multipart", "openapi-spec-validator (>=0.5.0)", "py-partiql-parser (==0.5.5)", "pyparsing (>=3.0.7)", "setuptools"] +resourcegroupstaggingapi = ["PyYAML (>=5.1)", "cfn-lint (>=0.40.0)", "docker (>=3.0.0)", "graphql-core", "joserfc (>=0.9.0)", "jsondiff (>=1.1.2)", "openapi-spec-validator (>=0.5.0)", "py-partiql-parser (==0.5.5)", "pyparsing (>=3.0.7)"] +s3 = ["PyYAML (>=5.1)", "py-partiql-parser (==0.5.5)"] +s3crc32c = ["PyYAML (>=5.1)", "crc32c", "py-partiql-parser (==0.5.5)"] +server = ["PyYAML (>=5.1)", "antlr4-python3-runtime", "aws-xray-sdk (>=0.93,!=0.96)", "cfn-lint (>=0.40.0)", "docker (>=3.0.0)", "flask (!=2.2.0,!=2.2.1)", "flask-cors", "graphql-core", "joserfc (>=0.9.0)", "jsondiff (>=1.1.2)", "jsonpath-ng", "openapi-spec-validator (>=0.5.0)", "py-partiql-parser (==0.5.5)", "pyparsing (>=3.0.7)", "setuptools"] ssm = ["PyYAML (>=5.1)"] stepfunctions = ["antlr4-python3-runtime", "jsonpath-ng"] xray = ["aws-xray-sdk (>=0.93,!=0.96)", "setuptools"] @@ -2490,13 +2490,13 @@ files = [ [[package]] name = "mypy-boto3-glue" -version = "1.34.88" -description = "Type annotations for boto3.Glue 1.34.88 service generated with mypy-boto3-builder 7.23.2" +version = "1.34.110" +description = "Type annotations for boto3.Glue 1.34.110 service generated with mypy-boto3-builder 7.24.0" optional = true python-versions = ">=3.8" files = [ - {file = "mypy_boto3_glue-1.34.88-py3-none-any.whl", hash = "sha256:bb5c4ac3ac4806fb19ff3bebe2400635cf0d959e4a086a3de36b0eccbf04febc"}, - {file = "mypy_boto3_glue-1.34.88.tar.gz", hash = "sha256:7626368b66c92236f57008bf56303f3eda1ef2705ffe0d2cd845b1b877eb0596"}, + {file = "mypy_boto3_glue-1.34.110-py3-none-any.whl", hash = "sha256:795eca329426bf1ae3dc95090cccafcd7b3d91c4c594dac4db1fd9d6c72390c9"}, + {file = "mypy_boto3_glue-1.34.110.tar.gz", hash = "sha256:80d39849ac10ad9d57d85b94016fce8caba2cb70a3544b5b8b9bf0713ab3a041"}, ] [package.dependencies] @@ -2915,13 +2915,13 @@ files = [ [[package]] name = "py-partiql-parser" -version = "0.5.4" +version = "0.5.5" description = "Pure Python PartiQL Parser" optional = false python-versions = "*" files = [ - {file = "py_partiql_parser-0.5.4-py2.py3-none-any.whl", hash = "sha256:3dc4295a47da9587681a96b35c6e151886fdbd0a4acbe0d97c4c68e5f689d315"}, - {file = "py_partiql_parser-0.5.4.tar.gz", hash = "sha256:72e043919538fa63edae72fb59afc7e3fd93adbde656718a7d2b4666f23dd114"}, + {file = "py_partiql_parser-0.5.5-py2.py3-none-any.whl", hash = "sha256:90d278818385bd60c602410c953ee78f04ece599d8cd21c656fc5e47399577a1"}, + {file = "py_partiql_parser-0.5.5.tar.gz", hash = "sha256:ed07f8edf4b55e295cab4f5fd3e2ba3196cee48a43fe210d53ddd6ffce1cf1ff"}, ] [package.extras] @@ -2940,47 +2940,47 @@ files = [ [[package]] name = "pyarrow" -version = "16.0.0" +version = "16.1.0" description = "Python library for Apache Arrow" optional = true python-versions = ">=3.8" files = [ - {file = "pyarrow-16.0.0-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:22a1fdb1254e5095d629e29cd1ea98ed04b4bbfd8e42cc670a6b639ccc208b60"}, - {file = "pyarrow-16.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:574a00260a4ed9d118a14770edbd440b848fcae5a3024128be9d0274dbcaf858"}, - {file = "pyarrow-16.0.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c0815d0ddb733b8c1b53a05827a91f1b8bde6240f3b20bf9ba5d650eb9b89cdf"}, - {file = "pyarrow-16.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:df0080339387b5d30de31e0a149c0c11a827a10c82f0c67d9afae3981d1aabb7"}, - {file = "pyarrow-16.0.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:edf38cce0bf0dcf726e074159c60516447e4474904c0033f018c1f33d7dac6c5"}, - {file = "pyarrow-16.0.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:91d28f9a40f1264eab2af7905a4d95320ac2f287891e9c8b0035f264fe3c3a4b"}, - {file = "pyarrow-16.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:99af421ee451a78884d7faea23816c429e263bd3618b22d38e7992c9ce2a7ad9"}, - {file = "pyarrow-16.0.0-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:d22d0941e6c7bafddf5f4c0662e46f2075850f1c044bf1a03150dd9e189427ce"}, - {file = "pyarrow-16.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:266ddb7e823f03733c15adc8b5078db2df6980f9aa93d6bb57ece615df4e0ba7"}, - {file = "pyarrow-16.0.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5cc23090224b6594f5a92d26ad47465af47c1d9c079dd4a0061ae39551889efe"}, - {file = "pyarrow-16.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:56850a0afe9ef37249d5387355449c0f94d12ff7994af88f16803a26d38f2016"}, - {file = "pyarrow-16.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:705db70d3e2293c2f6f8e84874b5b775f690465798f66e94bb2c07bab0a6bb55"}, - {file = "pyarrow-16.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:5448564754c154997bc09e95a44b81b9e31ae918a86c0fcb35c4aa4922756f55"}, - {file = "pyarrow-16.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:729f7b262aa620c9df8b9967db96c1575e4cfc8c25d078a06968e527b8d6ec05"}, - {file = "pyarrow-16.0.0-cp312-cp312-macosx_10_15_x86_64.whl", hash = "sha256:fb8065dbc0d051bf2ae2453af0484d99a43135cadabacf0af588a3be81fbbb9b"}, - {file = "pyarrow-16.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:20ce707d9aa390593ea93218b19d0eadab56390311cb87aad32c9a869b0e958c"}, - {file = "pyarrow-16.0.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5823275c8addbbb50cd4e6a6839952682a33255b447277e37a6f518d6972f4e1"}, - {file = "pyarrow-16.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1ab8b9050752b16a8b53fcd9853bf07d8daf19093533e990085168f40c64d978"}, - {file = "pyarrow-16.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:42e56557bc7c5c10d3e42c3b32f6cff649a29d637e8f4e8b311d334cc4326730"}, - {file = "pyarrow-16.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:2a7abdee4a4a7cfa239e2e8d721224c4b34ffe69a0ca7981354fe03c1328789b"}, - {file = "pyarrow-16.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:ef2f309b68396bcc5a354106741d333494d6a0d3e1951271849787109f0229a6"}, - {file = "pyarrow-16.0.0-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:ed66e5217b4526fa3585b5e39b0b82f501b88a10d36bd0d2a4d8aa7b5a48e2df"}, - {file = "pyarrow-16.0.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:cc8814310486f2a73c661ba8354540f17eef51e1b6dd090b93e3419d3a097b3a"}, - {file = "pyarrow-16.0.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3c2f5e239db7ed43e0ad2baf46a6465f89c824cc703f38ef0fde927d8e0955f7"}, - {file = "pyarrow-16.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f293e92d1db251447cb028ae12f7bc47526e4649c3a9924c8376cab4ad6b98bd"}, - {file = "pyarrow-16.0.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:dd9334a07b6dc21afe0857aa31842365a62eca664e415a3f9536e3a8bb832c07"}, - {file = "pyarrow-16.0.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:d91073d1e2fef2c121154680e2ba7e35ecf8d4969cc0af1fa6f14a8675858159"}, - {file = "pyarrow-16.0.0-cp38-cp38-win_amd64.whl", hash = "sha256:71d52561cd7aefd22cf52538f262850b0cc9e4ec50af2aaa601da3a16ef48877"}, - {file = "pyarrow-16.0.0-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:b93c9a50b965ee0bf4fef65e53b758a7e8dcc0c2d86cebcc037aaaf1b306ecc0"}, - {file = "pyarrow-16.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:d831690844706e374c455fba2fb8cfcb7b797bfe53ceda4b54334316e1ac4fa4"}, - {file = "pyarrow-16.0.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:35692ce8ad0b8c666aa60f83950957096d92f2a9d8d7deda93fb835e6053307e"}, - {file = "pyarrow-16.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9dd3151d098e56f16a8389c1247137f9e4c22720b01c6f3aa6dec29a99b74d80"}, - {file = "pyarrow-16.0.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:bd40467bdb3cbaf2044ed7a6f7f251c8f941c8b31275aaaf88e746c4f3ca4a7a"}, - {file = "pyarrow-16.0.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:00a1dcb22ad4ceb8af87f7bd30cc3354788776c417f493089e0a0af981bc8d80"}, - {file = "pyarrow-16.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:fda9a7cebd1b1d46c97b511f60f73a5b766a6de4c5236f144f41a5d5afec1f35"}, - {file = "pyarrow-16.0.0.tar.gz", hash = "sha256:59bb1f1edbbf4114c72415f039f1359f1a57d166a331c3229788ccbfbb31689a"}, + {file = "pyarrow-16.1.0-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:17e23b9a65a70cc733d8b738baa6ad3722298fa0c81d88f63ff94bf25eaa77b9"}, + {file = "pyarrow-16.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:4740cc41e2ba5d641071d0ab5e9ef9b5e6e8c7611351a5cb7c1d175eaf43674a"}, + {file = "pyarrow-16.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:98100e0268d04e0eec47b73f20b39c45b4006f3c4233719c3848aa27a03c1aef"}, + {file = "pyarrow-16.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f68f409e7b283c085f2da014f9ef81e885d90dcd733bd648cfba3ef265961848"}, + {file = "pyarrow-16.1.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:a8914cd176f448e09746037b0c6b3a9d7688cef451ec5735094055116857580c"}, + {file = "pyarrow-16.1.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:48be160782c0556156d91adbdd5a4a7e719f8d407cb46ae3bb4eaee09b3111bd"}, + {file = "pyarrow-16.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:9cf389d444b0f41d9fe1444b70650fea31e9d52cfcb5f818b7888b91b586efff"}, + {file = "pyarrow-16.1.0-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:d0ebea336b535b37eee9eee31761813086d33ed06de9ab6fc6aaa0bace7b250c"}, + {file = "pyarrow-16.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2e73cfc4a99e796727919c5541c65bb88b973377501e39b9842ea71401ca6c1c"}, + {file = "pyarrow-16.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bf9251264247ecfe93e5f5a0cd43b8ae834f1e61d1abca22da55b20c788417f6"}, + {file = "pyarrow-16.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ddf5aace92d520d3d2a20031d8b0ec27b4395cab9f74e07cc95edf42a5cc0147"}, + {file = "pyarrow-16.1.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:25233642583bf658f629eb230b9bb79d9af4d9f9229890b3c878699c82f7d11e"}, + {file = "pyarrow-16.1.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:a33a64576fddfbec0a44112eaf844c20853647ca833e9a647bfae0582b2ff94b"}, + {file = "pyarrow-16.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:185d121b50836379fe012753cf15c4ba9638bda9645183ab36246923875f8d1b"}, + {file = "pyarrow-16.1.0-cp312-cp312-macosx_10_15_x86_64.whl", hash = "sha256:2e51ca1d6ed7f2e9d5c3c83decf27b0d17bb207a7dea986e8dc3e24f80ff7d6f"}, + {file = "pyarrow-16.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:06ebccb6f8cb7357de85f60d5da50e83507954af617d7b05f48af1621d331c9a"}, + {file = "pyarrow-16.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b04707f1979815f5e49824ce52d1dceb46e2f12909a48a6a753fe7cafbc44a0c"}, + {file = "pyarrow-16.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0d32000693deff8dc5df444b032b5985a48592c0697cb6e3071a5d59888714e2"}, + {file = "pyarrow-16.1.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:8785bb10d5d6fd5e15d718ee1d1f914fe768bf8b4d1e5e9bf253de8a26cb1628"}, + {file = "pyarrow-16.1.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:e1369af39587b794873b8a307cc6623a3b1194e69399af0efd05bb202195a5a7"}, + {file = "pyarrow-16.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:febde33305f1498f6df85e8020bca496d0e9ebf2093bab9e0f65e2b4ae2b3444"}, + {file = "pyarrow-16.1.0-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:b5f5705ab977947a43ac83b52ade3b881eb6e95fcc02d76f501d549a210ba77f"}, + {file = "pyarrow-16.1.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:0d27bf89dfc2576f6206e9cd6cf7a107c9c06dc13d53bbc25b0bd4556f19cf5f"}, + {file = "pyarrow-16.1.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0d07de3ee730647a600037bc1d7b7994067ed64d0eba797ac74b2bc77384f4c2"}, + {file = "pyarrow-16.1.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fbef391b63f708e103df99fbaa3acf9f671d77a183a07546ba2f2c297b361e83"}, + {file = "pyarrow-16.1.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:19741c4dbbbc986d38856ee7ddfdd6a00fc3b0fc2d928795b95410d38bb97d15"}, + {file = "pyarrow-16.1.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:f2c5fb249caa17b94e2b9278b36a05ce03d3180e6da0c4c3b3ce5b2788f30eed"}, + {file = "pyarrow-16.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:e6b6d3cd35fbb93b70ade1336022cc1147b95ec6af7d36906ca7fe432eb09710"}, + {file = "pyarrow-16.1.0-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:18da9b76a36a954665ccca8aa6bd9f46c1145f79c0bb8f4f244f5f8e799bca55"}, + {file = "pyarrow-16.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:99f7549779b6e434467d2aa43ab2b7224dd9e41bdde486020bae198978c9e05e"}, + {file = "pyarrow-16.1.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f07fdffe4fd5b15f5ec15c8b64584868d063bc22b86b46c9695624ca3505b7b4"}, + {file = "pyarrow-16.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ddfe389a08ea374972bd4065d5f25d14e36b43ebc22fc75f7b951f24378bf0b5"}, + {file = "pyarrow-16.1.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:3b20bd67c94b3a2ea0a749d2a5712fc845a69cb5d52e78e6449bbd295611f3aa"}, + {file = "pyarrow-16.1.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:ba8ac20693c0bb0bf4b238751d4409e62852004a8cf031c73b0e0962b03e45e3"}, + {file = "pyarrow-16.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:31a1851751433d89a986616015841977e0a188662fcffd1a5677453f1df2de0a"}, + {file = "pyarrow-16.1.0.tar.gz", hash = "sha256:15fbb22ea96d11f0b5768504a3f961edab25eaf4197c341720c4a387f6c60315"}, ] [package.dependencies] @@ -3024,18 +3024,18 @@ files = [ [[package]] name = "pydantic" -version = "2.7.1" +version = "2.7.2" description = "Data validation using Python type hints" optional = false python-versions = ">=3.8" files = [ - {file = "pydantic-2.7.1-py3-none-any.whl", hash = "sha256:e029badca45266732a9a79898a15ae2e8b14840b1eabbb25844be28f0b33f3d5"}, - {file = "pydantic-2.7.1.tar.gz", hash = "sha256:e9dbb5eada8abe4d9ae5f46b9939aead650cd2b68f249bb3a8139dbe125803cc"}, + {file = "pydantic-2.7.2-py3-none-any.whl", hash = "sha256:834ab954175f94e6e68258537dc49402c4a5e9d0409b9f1b86b7e934a8372de7"}, + {file = "pydantic-2.7.2.tar.gz", hash = "sha256:71b2945998f9c9b7919a45bde9a50397b289937d215ae141c1d0903ba7149fd7"}, ] [package.dependencies] annotated-types = ">=0.4.0" -pydantic-core = "2.18.2" +pydantic-core = "2.18.3" typing-extensions = ">=4.6.1" [package.extras] @@ -3043,90 +3043,90 @@ email = ["email-validator (>=2.0.0)"] [[package]] name = "pydantic-core" -version = "2.18.2" +version = "2.18.3" description = "Core functionality for Pydantic validation and serialization" optional = false python-versions = ">=3.8" files = [ - {file = "pydantic_core-2.18.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:9e08e867b306f525802df7cd16c44ff5ebbe747ff0ca6cf3fde7f36c05a59a81"}, - {file = "pydantic_core-2.18.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f0a21cbaa69900cbe1a2e7cad2aa74ac3cf21b10c3efb0fa0b80305274c0e8a2"}, - {file = "pydantic_core-2.18.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0680b1f1f11fda801397de52c36ce38ef1c1dc841a0927a94f226dea29c3ae3d"}, - {file = "pydantic_core-2.18.2-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:95b9d5e72481d3780ba3442eac863eae92ae43a5f3adb5b4d0a1de89d42bb250"}, - {file = "pydantic_core-2.18.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c4fcf5cd9c4b655ad666ca332b9a081112cd7a58a8b5a6ca7a3104bc950f2038"}, - {file = "pydantic_core-2.18.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9b5155ff768083cb1d62f3e143b49a8a3432e6789a3abee8acd005c3c7af1c74"}, - {file = "pydantic_core-2.18.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:553ef617b6836fc7e4df130bb851e32fe357ce36336d897fd6646d6058d980af"}, - {file = "pydantic_core-2.18.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b89ed9eb7d616ef5714e5590e6cf7f23b02d0d539767d33561e3675d6f9e3857"}, - {file = "pydantic_core-2.18.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:75f7e9488238e920ab6204399ded280dc4c307d034f3924cd7f90a38b1829563"}, - {file = "pydantic_core-2.18.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:ef26c9e94a8c04a1b2924149a9cb081836913818e55681722d7f29af88fe7b38"}, - {file = "pydantic_core-2.18.2-cp310-none-win32.whl", hash = "sha256:182245ff6b0039e82b6bb585ed55a64d7c81c560715d1bad0cbad6dfa07b4027"}, - {file = "pydantic_core-2.18.2-cp310-none-win_amd64.whl", hash = "sha256:e23ec367a948b6d812301afc1b13f8094ab7b2c280af66ef450efc357d2ae543"}, - {file = "pydantic_core-2.18.2-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:219da3f096d50a157f33645a1cf31c0ad1fe829a92181dd1311022f986e5fbe3"}, - {file = "pydantic_core-2.18.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:cc1cfd88a64e012b74e94cd00bbe0f9c6df57049c97f02bb07d39e9c852e19a4"}, - {file = "pydantic_core-2.18.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:05b7133a6e6aeb8df37d6f413f7705a37ab4031597f64ab56384c94d98fa0e90"}, - {file = "pydantic_core-2.18.2-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:224c421235f6102e8737032483f43c1a8cfb1d2f45740c44166219599358c2cd"}, - {file = "pydantic_core-2.18.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b14d82cdb934e99dda6d9d60dc84a24379820176cc4a0d123f88df319ae9c150"}, - {file = "pydantic_core-2.18.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2728b01246a3bba6de144f9e3115b532ee44bd6cf39795194fb75491824a1413"}, - {file = "pydantic_core-2.18.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:470b94480bb5ee929f5acba6995251ada5e059a5ef3e0dfc63cca287283ebfa6"}, - {file = "pydantic_core-2.18.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:997abc4df705d1295a42f95b4eec4950a37ad8ae46d913caeee117b6b198811c"}, - {file = "pydantic_core-2.18.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:75250dbc5290e3f1a0f4618db35e51a165186f9034eff158f3d490b3fed9f8a0"}, - {file = "pydantic_core-2.18.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:4456f2dca97c425231d7315737d45239b2b51a50dc2b6f0c2bb181fce6207664"}, - {file = "pydantic_core-2.18.2-cp311-none-win32.whl", hash = "sha256:269322dcc3d8bdb69f054681edff86276b2ff972447863cf34c8b860f5188e2e"}, - {file = "pydantic_core-2.18.2-cp311-none-win_amd64.whl", hash = "sha256:800d60565aec896f25bc3cfa56d2277d52d5182af08162f7954f938c06dc4ee3"}, - {file = "pydantic_core-2.18.2-cp311-none-win_arm64.whl", hash = "sha256:1404c69d6a676245199767ba4f633cce5f4ad4181f9d0ccb0577e1f66cf4c46d"}, - {file = "pydantic_core-2.18.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:fb2bd7be70c0fe4dfd32c951bc813d9fe6ebcbfdd15a07527796c8204bd36242"}, - {file = "pydantic_core-2.18.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6132dd3bd52838acddca05a72aafb6eab6536aa145e923bb50f45e78b7251043"}, - {file = "pydantic_core-2.18.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7d904828195733c183d20a54230c0df0eb46ec746ea1a666730787353e87182"}, - {file = "pydantic_core-2.18.2-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c9bd70772c720142be1020eac55f8143a34ec9f82d75a8e7a07852023e46617f"}, - {file = "pydantic_core-2.18.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2b8ed04b3582771764538f7ee7001b02e1170223cf9b75dff0bc698fadb00cf3"}, - {file = "pydantic_core-2.18.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e6dac87ddb34aaec85f873d737e9d06a3555a1cc1a8e0c44b7f8d5daeb89d86f"}, - {file = "pydantic_core-2.18.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7ca4ae5a27ad7a4ee5170aebce1574b375de390bc01284f87b18d43a3984df72"}, - {file = "pydantic_core-2.18.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:886eec03591b7cf058467a70a87733b35f44707bd86cf64a615584fd72488b7c"}, - {file = "pydantic_core-2.18.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:ca7b0c1f1c983e064caa85f3792dd2fe3526b3505378874afa84baf662e12241"}, - {file = "pydantic_core-2.18.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4b4356d3538c3649337df4074e81b85f0616b79731fe22dd11b99499b2ebbdf3"}, - {file = "pydantic_core-2.18.2-cp312-none-win32.whl", hash = "sha256:8b172601454f2d7701121bbec3425dd71efcb787a027edf49724c9cefc14c038"}, - {file = "pydantic_core-2.18.2-cp312-none-win_amd64.whl", hash = "sha256:b1bd7e47b1558ea872bd16c8502c414f9e90dcf12f1395129d7bb42a09a95438"}, - {file = "pydantic_core-2.18.2-cp312-none-win_arm64.whl", hash = "sha256:98758d627ff397e752bc339272c14c98199c613f922d4a384ddc07526c86a2ec"}, - {file = "pydantic_core-2.18.2-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:9fdad8e35f278b2c3eb77cbdc5c0a49dada440657bf738d6905ce106dc1de439"}, - {file = "pydantic_core-2.18.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:1d90c3265ae107f91a4f279f4d6f6f1d4907ac76c6868b27dc7fb33688cfb347"}, - {file = "pydantic_core-2.18.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:390193c770399861d8df9670fb0d1874f330c79caaca4642332df7c682bf6b91"}, - {file = "pydantic_core-2.18.2-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:82d5d4d78e4448683cb467897fe24e2b74bb7b973a541ea1dcfec1d3cbce39fb"}, - {file = "pydantic_core-2.18.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4774f3184d2ef3e14e8693194f661dea5a4d6ca4e3dc8e39786d33a94865cefd"}, - {file = "pydantic_core-2.18.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d4d938ec0adf5167cb335acb25a4ee69a8107e4984f8fbd2e897021d9e4ca21b"}, - {file = "pydantic_core-2.18.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e0e8b1be28239fc64a88a8189d1df7fad8be8c1ae47fcc33e43d4be15f99cc70"}, - {file = "pydantic_core-2.18.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:868649da93e5a3d5eacc2b5b3b9235c98ccdbfd443832f31e075f54419e1b96b"}, - {file = "pydantic_core-2.18.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:78363590ef93d5d226ba21a90a03ea89a20738ee5b7da83d771d283fd8a56761"}, - {file = "pydantic_core-2.18.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:852e966fbd035a6468fc0a3496589b45e2208ec7ca95c26470a54daed82a0788"}, - {file = "pydantic_core-2.18.2-cp38-none-win32.whl", hash = "sha256:6a46e22a707e7ad4484ac9ee9f290f9d501df45954184e23fc29408dfad61350"}, - {file = "pydantic_core-2.18.2-cp38-none-win_amd64.whl", hash = "sha256:d91cb5ea8b11607cc757675051f61b3d93f15eca3cefb3e6c704a5d6e8440f4e"}, - {file = "pydantic_core-2.18.2-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:ae0a8a797a5e56c053610fa7be147993fe50960fa43609ff2a9552b0e07013e8"}, - {file = "pydantic_core-2.18.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:042473b6280246b1dbf530559246f6842b56119c2926d1e52b631bdc46075f2a"}, - {file = "pydantic_core-2.18.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1a388a77e629b9ec814c1b1e6b3b595fe521d2cdc625fcca26fbc2d44c816804"}, - {file = "pydantic_core-2.18.2-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e25add29b8f3b233ae90ccef2d902d0ae0432eb0d45370fe315d1a5cf231004b"}, - {file = "pydantic_core-2.18.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f459a5ce8434614dfd39bbebf1041952ae01da6bed9855008cb33b875cb024c0"}, - {file = "pydantic_core-2.18.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:eff2de745698eb46eeb51193a9f41d67d834d50e424aef27df2fcdee1b153845"}, - {file = "pydantic_core-2.18.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a8309f67285bdfe65c372ea3722b7a5642680f3dba538566340a9d36e920b5f0"}, - {file = "pydantic_core-2.18.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f93a8a2e3938ff656a7c1bc57193b1319960ac015b6e87d76c76bf14fe0244b4"}, - {file = "pydantic_core-2.18.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:22057013c8c1e272eb8d0eebc796701167d8377441ec894a8fed1af64a0bf399"}, - {file = "pydantic_core-2.18.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:cfeecd1ac6cc1fb2692c3d5110781c965aabd4ec5d32799773ca7b1456ac636b"}, - {file = "pydantic_core-2.18.2-cp39-none-win32.whl", hash = "sha256:0d69b4c2f6bb3e130dba60d34c0845ba31b69babdd3f78f7c0c8fae5021a253e"}, - {file = "pydantic_core-2.18.2-cp39-none-win_amd64.whl", hash = "sha256:d9319e499827271b09b4e411905b24a426b8fb69464dfa1696258f53a3334641"}, - {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:a1874c6dd4113308bd0eb568418e6114b252afe44319ead2b4081e9b9521fe75"}, - {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:ccdd111c03bfd3666bd2472b674c6899550e09e9f298954cfc896ab92b5b0e6d"}, - {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e18609ceaa6eed63753037fc06ebb16041d17d28199ae5aba0052c51449650a9"}, - {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6e5c584d357c4e2baf0ff7baf44f4994be121e16a2c88918a5817331fc7599d7"}, - {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:43f0f463cf89ace478de71a318b1b4f05ebc456a9b9300d027b4b57c1a2064fb"}, - {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:e1b395e58b10b73b07b7cf740d728dd4ff9365ac46c18751bf8b3d8cca8f625a"}, - {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:0098300eebb1c837271d3d1a2cd2911e7c11b396eac9661655ee524a7f10587b"}, - {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:36789b70d613fbac0a25bb07ab3d9dba4d2e38af609c020cf4d888d165ee0bf3"}, - {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:3f9a801e7c8f1ef8718da265bba008fa121243dfe37c1cea17840b0944dfd72c"}, - {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:3a6515ebc6e69d85502b4951d89131ca4e036078ea35533bb76327f8424531ce"}, - {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:20aca1e2298c56ececfd8ed159ae4dde2df0781988c97ef77d5c16ff4bd5b400"}, - {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:223ee893d77a310a0391dca6df00f70bbc2f36a71a895cecd9a0e762dc37b349"}, - {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2334ce8c673ee93a1d6a65bd90327588387ba073c17e61bf19b4fd97d688d63c"}, - {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:cbca948f2d14b09d20268cda7b0367723d79063f26c4ffc523af9042cad95592"}, - {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:b3ef08e20ec49e02d5c6717a91bb5af9b20f1805583cb0adfe9ba2c6b505b5ae"}, - {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:c6fdc8627910eed0c01aed6a390a252fe3ea6d472ee70fdde56273f198938374"}, - {file = "pydantic_core-2.18.2.tar.gz", hash = "sha256:2e29d20810dfc3043ee13ac7d9e25105799817683348823f305ab3f349b9386e"}, + {file = "pydantic_core-2.18.3-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:744697428fcdec6be5670460b578161d1ffe34743a5c15656be7ea82b008197c"}, + {file = "pydantic_core-2.18.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:37b40c05ced1ba4218b14986fe6f283d22e1ae2ff4c8e28881a70fb81fbfcda7"}, + {file = "pydantic_core-2.18.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:544a9a75622357076efb6b311983ff190fbfb3c12fc3a853122b34d3d358126c"}, + {file = "pydantic_core-2.18.3-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e2e253af04ceaebde8eb201eb3f3e3e7e390f2d275a88300d6a1959d710539e2"}, + {file = "pydantic_core-2.18.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:855ec66589c68aa367d989da5c4755bb74ee92ccad4fdb6af942c3612c067e34"}, + {file = "pydantic_core-2.18.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3d3e42bb54e7e9d72c13ce112e02eb1b3b55681ee948d748842171201a03a98a"}, + {file = "pydantic_core-2.18.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c6ac9ffccc9d2e69d9fba841441d4259cb668ac180e51b30d3632cd7abca2b9b"}, + {file = "pydantic_core-2.18.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c56eca1686539fa0c9bda992e7bd6a37583f20083c37590413381acfc5f192d6"}, + {file = "pydantic_core-2.18.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:17954d784bf8abfc0ec2a633108207ebc4fa2df1a0e4c0c3ccbaa9bb01d2c426"}, + {file = "pydantic_core-2.18.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:98ed737567d8f2ecd54f7c8d4f8572ca7c7921ede93a2e52939416170d357812"}, + {file = "pydantic_core-2.18.3-cp310-none-win32.whl", hash = "sha256:9f9e04afebd3ed8c15d67a564ed0a34b54e52136c6d40d14c5547b238390e779"}, + {file = "pydantic_core-2.18.3-cp310-none-win_amd64.whl", hash = "sha256:45e4ffbae34f7ae30d0047697e724e534a7ec0a82ef9994b7913a412c21462a0"}, + {file = "pydantic_core-2.18.3-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:b9ebe8231726c49518b16b237b9fe0d7d361dd221302af511a83d4ada01183ab"}, + {file = "pydantic_core-2.18.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b8e20e15d18bf7dbb453be78a2d858f946f5cdf06c5072453dace00ab652e2b2"}, + {file = "pydantic_core-2.18.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c0d9ff283cd3459fa0bf9b0256a2b6f01ac1ff9ffb034e24457b9035f75587cb"}, + {file = "pydantic_core-2.18.3-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2f7ef5f0ebb77ba24c9970da18b771711edc5feaf00c10b18461e0f5f5949231"}, + {file = "pydantic_core-2.18.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:73038d66614d2e5cde30435b5afdced2b473b4c77d4ca3a8624dd3e41a9c19be"}, + {file = "pydantic_core-2.18.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6afd5c867a74c4d314c557b5ea9520183fadfbd1df4c2d6e09fd0d990ce412cd"}, + {file = "pydantic_core-2.18.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bd7df92f28d351bb9f12470f4c533cf03d1b52ec5a6e5c58c65b183055a60106"}, + {file = "pydantic_core-2.18.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:80aea0ffeb1049336043d07799eace1c9602519fb3192916ff525b0287b2b1e4"}, + {file = "pydantic_core-2.18.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:aaee40f25bba38132e655ffa3d1998a6d576ba7cf81deff8bfa189fb43fd2bbe"}, + {file = "pydantic_core-2.18.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:9128089da8f4fe73f7a91973895ebf2502539d627891a14034e45fb9e707e26d"}, + {file = "pydantic_core-2.18.3-cp311-none-win32.whl", hash = "sha256:fec02527e1e03257aa25b1a4dcbe697b40a22f1229f5d026503e8b7ff6d2eda7"}, + {file = "pydantic_core-2.18.3-cp311-none-win_amd64.whl", hash = "sha256:58ff8631dbab6c7c982e6425da8347108449321f61fe427c52ddfadd66642af7"}, + {file = "pydantic_core-2.18.3-cp311-none-win_arm64.whl", hash = "sha256:3fc1c7f67f34c6c2ef9c213e0f2a351797cda98249d9ca56a70ce4ebcaba45f4"}, + {file = "pydantic_core-2.18.3-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:f0928cde2ae416a2d1ebe6dee324709c6f73e93494d8c7aea92df99aab1fc40f"}, + {file = "pydantic_core-2.18.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0bee9bb305a562f8b9271855afb6ce00223f545de3d68560b3c1649c7c5295e9"}, + {file = "pydantic_core-2.18.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e862823be114387257dacbfa7d78547165a85d7add33b446ca4f4fae92c7ff5c"}, + {file = "pydantic_core-2.18.3-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6a36f78674cbddc165abab0df961b5f96b14461d05feec5e1f78da58808b97e7"}, + {file = "pydantic_core-2.18.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ba905d184f62e7ddbb7a5a751d8a5c805463511c7b08d1aca4a3e8c11f2e5048"}, + {file = "pydantic_core-2.18.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7fdd362f6a586e681ff86550b2379e532fee63c52def1c666887956748eaa326"}, + {file = "pydantic_core-2.18.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:24b214b7ee3bd3b865e963dbed0f8bc5375f49449d70e8d407b567af3222aae4"}, + {file = "pydantic_core-2.18.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:691018785779766127f531674fa82bb368df5b36b461622b12e176c18e119022"}, + {file = "pydantic_core-2.18.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:60e4c625e6f7155d7d0dcac151edf5858102bc61bf959d04469ca6ee4e8381bd"}, + {file = "pydantic_core-2.18.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:a4e651e47d981c1b701dcc74ab8fec5a60a5b004650416b4abbef13db23bc7be"}, + {file = "pydantic_core-2.18.3-cp312-none-win32.whl", hash = "sha256:ffecbb5edb7f5ffae13599aec33b735e9e4c7676ca1633c60f2c606beb17efc5"}, + {file = "pydantic_core-2.18.3-cp312-none-win_amd64.whl", hash = "sha256:2c8333f6e934733483c7eddffdb094c143b9463d2af7e6bd85ebcb2d4a1b82c6"}, + {file = "pydantic_core-2.18.3-cp312-none-win_arm64.whl", hash = "sha256:7a20dded653e516a4655f4c98e97ccafb13753987434fe7cf044aa25f5b7d417"}, + {file = "pydantic_core-2.18.3-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:eecf63195be644b0396f972c82598cd15693550f0ff236dcf7ab92e2eb6d3522"}, + {file = "pydantic_core-2.18.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:2c44efdd3b6125419c28821590d7ec891c9cb0dff33a7a78d9d5c8b6f66b9702"}, + {file = "pydantic_core-2.18.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6e59fca51ffbdd1638b3856779342ed69bcecb8484c1d4b8bdb237d0eb5a45e2"}, + {file = "pydantic_core-2.18.3-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:70cf099197d6b98953468461d753563b28e73cf1eade2ffe069675d2657ed1d5"}, + {file = "pydantic_core-2.18.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:63081a49dddc6124754b32a3774331467bfc3d2bd5ff8f10df36a95602560361"}, + {file = "pydantic_core-2.18.3-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:370059b7883485c9edb9655355ff46d912f4b03b009d929220d9294c7fd9fd60"}, + {file = "pydantic_core-2.18.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5a64faeedfd8254f05f5cf6fc755023a7e1606af3959cfc1a9285744cc711044"}, + {file = "pydantic_core-2.18.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:19d2e725de0f90d8671f89e420d36c3dd97639b98145e42fcc0e1f6d492a46dc"}, + {file = "pydantic_core-2.18.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:67bc078025d70ec5aefe6200ef094576c9d86bd36982df1301c758a9fff7d7f4"}, + {file = "pydantic_core-2.18.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:adf952c3f4100e203cbaf8e0c907c835d3e28f9041474e52b651761dc248a3c0"}, + {file = "pydantic_core-2.18.3-cp38-none-win32.whl", hash = "sha256:9a46795b1f3beb167eaee91736d5d17ac3a994bf2215a996aed825a45f897558"}, + {file = "pydantic_core-2.18.3-cp38-none-win_amd64.whl", hash = "sha256:200ad4e3133cb99ed82342a101a5abf3d924722e71cd581cc113fe828f727fbc"}, + {file = "pydantic_core-2.18.3-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:304378b7bf92206036c8ddd83a2ba7b7d1a5b425acafff637172a3aa72ad7083"}, + {file = "pydantic_core-2.18.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c826870b277143e701c9ccf34ebc33ddb4d072612683a044e7cce2d52f6c3fef"}, + {file = "pydantic_core-2.18.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e201935d282707394f3668380e41ccf25b5794d1b131cdd96b07f615a33ca4b1"}, + {file = "pydantic_core-2.18.3-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5560dda746c44b48bf82b3d191d74fe8efc5686a9ef18e69bdabccbbb9ad9442"}, + {file = "pydantic_core-2.18.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6b32c2a1f8032570842257e4c19288eba9a2bba4712af542327de9a1204faff8"}, + {file = "pydantic_core-2.18.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:929c24e9dea3990bc8bcd27c5f2d3916c0c86f5511d2caa69e0d5290115344a9"}, + {file = "pydantic_core-2.18.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e1a8376fef60790152564b0eab376b3e23dd6e54f29d84aad46f7b264ecca943"}, + {file = "pydantic_core-2.18.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:dccf3ef1400390ddd1fb55bf0632209d39140552d068ee5ac45553b556780e06"}, + {file = "pydantic_core-2.18.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:41dbdcb0c7252b58fa931fec47937edb422c9cb22528f41cb8963665c372caf6"}, + {file = "pydantic_core-2.18.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:666e45cf071669fde468886654742fa10b0e74cd0fa0430a46ba6056b24fb0af"}, + {file = "pydantic_core-2.18.3-cp39-none-win32.whl", hash = "sha256:f9c08cabff68704a1b4667d33f534d544b8a07b8e5d039c37067fceb18789e78"}, + {file = "pydantic_core-2.18.3-cp39-none-win_amd64.whl", hash = "sha256:4afa5f5973e8572b5c0dcb4e2d4fda7890e7cd63329bd5cc3263a25c92ef0026"}, + {file = "pydantic_core-2.18.3-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:77319771a026f7c7d29c6ebc623de889e9563b7087911b46fd06c044a12aa5e9"}, + {file = "pydantic_core-2.18.3-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:df11fa992e9f576473038510d66dd305bcd51d7dd508c163a8c8fe148454e059"}, + {file = "pydantic_core-2.18.3-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d531076bdfb65af593326ffd567e6ab3da145020dafb9187a1d131064a55f97c"}, + {file = "pydantic_core-2.18.3-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d33ce258e4e6e6038f2b9e8b8a631d17d017567db43483314993b3ca345dcbbb"}, + {file = "pydantic_core-2.18.3-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1f9cd7f5635b719939019be9bda47ecb56e165e51dd26c9a217a433e3d0d59a9"}, + {file = "pydantic_core-2.18.3-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:cd4a032bb65cc132cae1fe3e52877daecc2097965cd3914e44fbd12b00dae7c5"}, + {file = "pydantic_core-2.18.3-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:82f2718430098bcdf60402136c845e4126a189959d103900ebabb6774a5d9fdb"}, + {file = "pydantic_core-2.18.3-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:c0037a92cf0c580ed14e10953cdd26528e8796307bb8bb312dc65f71547df04d"}, + {file = "pydantic_core-2.18.3-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:b95a0972fac2b1ff3c94629fc9081b16371dad870959f1408cc33b2f78ad347a"}, + {file = "pydantic_core-2.18.3-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:a62e437d687cc148381bdd5f51e3e81f5b20a735c55f690c5be94e05da2b0d5c"}, + {file = "pydantic_core-2.18.3-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b367a73a414bbb08507da102dc2cde0fa7afe57d09b3240ce82a16d608a7679c"}, + {file = "pydantic_core-2.18.3-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ecce4b2360aa3f008da3327d652e74a0e743908eac306198b47e1c58b03dd2b"}, + {file = "pydantic_core-2.18.3-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:bd4435b8d83f0c9561a2a9585b1de78f1abb17cb0cef5f39bf6a4b47d19bafe3"}, + {file = "pydantic_core-2.18.3-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:616221a6d473c5b9aa83fa8982745441f6a4a62a66436be9445c65f241b86c94"}, + {file = "pydantic_core-2.18.3-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:7e6382ce89a92bc1d0c0c5edd51e931432202b9080dc921d8d003e616402efd1"}, + {file = "pydantic_core-2.18.3-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:ff58f379345603d940e461eae474b6bbb6dab66ed9a851ecd3cb3709bf4dcf6a"}, + {file = "pydantic_core-2.18.3.tar.gz", hash = "sha256:432e999088d85c8f36b9a3f769a8e2b57aabd817bbb729a90d1fe7f18f6f1f39"}, ] [package.dependencies] @@ -3584,13 +3584,13 @@ files = [ [[package]] name = "requests" -version = "2.31.0" +version = "2.32.3" description = "Python HTTP for Humans." optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "requests-2.31.0-py3-none-any.whl", hash = "sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f"}, - {file = "requests-2.31.0.tar.gz", hash = "sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1"}, + {file = "requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6"}, + {file = "requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760"}, ] [package.dependencies] @@ -3904,60 +3904,60 @@ files = [ [[package]] name = "sqlalchemy" -version = "2.0.29" +version = "2.0.30" description = "Database Abstraction Library" optional = true python-versions = ">=3.7" files = [ - {file = "SQLAlchemy-2.0.29-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:4c142852ae192e9fe5aad5c350ea6befe9db14370b34047e1f0f7cf99e63c63b"}, - {file = "SQLAlchemy-2.0.29-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:99a1e69d4e26f71e750e9ad6fdc8614fbddb67cfe2173a3628a2566034e223c7"}, - {file = "SQLAlchemy-2.0.29-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5ef3fbccb4058355053c51b82fd3501a6e13dd808c8d8cd2561e610c5456013c"}, - {file = "SQLAlchemy-2.0.29-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9d6753305936eddc8ed190e006b7bb33a8f50b9854823485eed3a886857ab8d1"}, - {file = "SQLAlchemy-2.0.29-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:0f3ca96af060a5250a8ad5a63699180bc780c2edf8abf96c58af175921df847a"}, - {file = "SQLAlchemy-2.0.29-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:c4520047006b1d3f0d89e0532978c0688219857eb2fee7c48052560ae76aca1e"}, - {file = "SQLAlchemy-2.0.29-cp310-cp310-win32.whl", hash = "sha256:b2a0e3cf0caac2085ff172c3faacd1e00c376e6884b5bc4dd5b6b84623e29e4f"}, - {file = "SQLAlchemy-2.0.29-cp310-cp310-win_amd64.whl", hash = "sha256:01d10638a37460616708062a40c7b55f73e4d35eaa146781c683e0fa7f6c43fb"}, - {file = "SQLAlchemy-2.0.29-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:308ef9cb41d099099fffc9d35781638986870b29f744382904bf9c7dadd08513"}, - {file = "SQLAlchemy-2.0.29-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:296195df68326a48385e7a96e877bc19aa210e485fa381c5246bc0234c36c78e"}, - {file = "SQLAlchemy-2.0.29-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a13b917b4ffe5a0a31b83d051d60477819ddf18276852ea68037a144a506efb9"}, - {file = "SQLAlchemy-2.0.29-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4f6d971255d9ddbd3189e2e79d743ff4845c07f0633adfd1de3f63d930dbe673"}, - {file = "SQLAlchemy-2.0.29-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:61405ea2d563407d316c63a7b5271ae5d274a2a9fbcd01b0aa5503635699fa1e"}, - {file = "SQLAlchemy-2.0.29-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:de7202ffe4d4a8c1e3cde1c03e01c1a3772c92858837e8f3879b497158e4cb44"}, - {file = "SQLAlchemy-2.0.29-cp311-cp311-win32.whl", hash = "sha256:b5d7ed79df55a731749ce65ec20d666d82b185fa4898430b17cb90c892741520"}, - {file = "SQLAlchemy-2.0.29-cp311-cp311-win_amd64.whl", hash = "sha256:205f5a2b39d7c380cbc3b5dcc8f2762fb5bcb716838e2d26ccbc54330775b003"}, - {file = "SQLAlchemy-2.0.29-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:d96710d834a6fb31e21381c6d7b76ec729bd08c75a25a5184b1089141356171f"}, - {file = "SQLAlchemy-2.0.29-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:52de4736404e53c5c6a91ef2698c01e52333988ebdc218f14c833237a0804f1b"}, - {file = "SQLAlchemy-2.0.29-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5c7b02525ede2a164c5fa5014915ba3591730f2cc831f5be9ff3b7fd3e30958e"}, - {file = "SQLAlchemy-2.0.29-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0dfefdb3e54cd15f5d56fd5ae32f1da2d95d78319c1f6dfb9bcd0eb15d603d5d"}, - {file = "SQLAlchemy-2.0.29-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:a88913000da9205b13f6f195f0813b6ffd8a0c0c2bd58d499e00a30eb508870c"}, - {file = "SQLAlchemy-2.0.29-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:fecd5089c4be1bcc37c35e9aa678938d2888845a134dd016de457b942cf5a758"}, - {file = "SQLAlchemy-2.0.29-cp312-cp312-win32.whl", hash = "sha256:8197d6f7a3d2b468861ebb4c9f998b9df9e358d6e1cf9c2a01061cb9b6cf4e41"}, - {file = "SQLAlchemy-2.0.29-cp312-cp312-win_amd64.whl", hash = "sha256:9b19836ccca0d321e237560e475fd99c3d8655d03da80c845c4da20dda31b6e1"}, - {file = "SQLAlchemy-2.0.29-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:87a1d53a5382cdbbf4b7619f107cc862c1b0a4feb29000922db72e5a66a5ffc0"}, - {file = "SQLAlchemy-2.0.29-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2a0732dffe32333211801b28339d2a0babc1971bc90a983e3035e7b0d6f06b93"}, - {file = "SQLAlchemy-2.0.29-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90453597a753322d6aa770c5935887ab1fc49cc4c4fdd436901308383d698b4b"}, - {file = "SQLAlchemy-2.0.29-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:ea311d4ee9a8fa67f139c088ae9f905fcf0277d6cd75c310a21a88bf85e130f5"}, - {file = "SQLAlchemy-2.0.29-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:5f20cb0a63a3e0ec4e169aa8890e32b949c8145983afa13a708bc4b0a1f30e03"}, - {file = "SQLAlchemy-2.0.29-cp37-cp37m-win32.whl", hash = "sha256:e5bbe55e8552019c6463709b39634a5fc55e080d0827e2a3a11e18eb73f5cdbd"}, - {file = "SQLAlchemy-2.0.29-cp37-cp37m-win_amd64.whl", hash = "sha256:c2f9c762a2735600654c654bf48dad388b888f8ce387b095806480e6e4ff6907"}, - {file = "SQLAlchemy-2.0.29-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:7e614d7a25a43a9f54fcce4675c12761b248547f3d41b195e8010ca7297c369c"}, - {file = "SQLAlchemy-2.0.29-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:471fcb39c6adf37f820350c28aac4a7df9d3940c6548b624a642852e727ea586"}, - {file = "SQLAlchemy-2.0.29-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:988569c8732f54ad3234cf9c561364221a9e943b78dc7a4aaf35ccc2265f1930"}, - {file = "SQLAlchemy-2.0.29-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dddaae9b81c88083e6437de95c41e86823d150f4ee94bf24e158a4526cbead01"}, - {file = "SQLAlchemy-2.0.29-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:334184d1ab8f4c87f9652b048af3f7abea1c809dfe526fb0435348a6fef3d380"}, - {file = "SQLAlchemy-2.0.29-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:38b624e5cf02a69b113c8047cf7f66b5dfe4a2ca07ff8b8716da4f1b3ae81567"}, - {file = "SQLAlchemy-2.0.29-cp38-cp38-win32.whl", hash = "sha256:bab41acf151cd68bc2b466deae5deeb9e8ae9c50ad113444151ad965d5bf685b"}, - {file = "SQLAlchemy-2.0.29-cp38-cp38-win_amd64.whl", hash = "sha256:52c8011088305476691b8750c60e03b87910a123cfd9ad48576d6414b6ec2a1d"}, - {file = "SQLAlchemy-2.0.29-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3071ad498896907a5ef756206b9dc750f8e57352113c19272bdfdc429c7bd7de"}, - {file = "SQLAlchemy-2.0.29-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:dba622396a3170974f81bad49aacebd243455ec3cc70615aeaef9e9613b5bca5"}, - {file = "SQLAlchemy-2.0.29-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7b184e3de58009cc0bf32e20f137f1ec75a32470f5fede06c58f6c355ed42a72"}, - {file = "SQLAlchemy-2.0.29-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8c37f1050feb91f3d6c32f864d8e114ff5545a4a7afe56778d76a9aec62638ba"}, - {file = "SQLAlchemy-2.0.29-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:bda7ce59b06d0f09afe22c56714c65c957b1068dee3d5e74d743edec7daba552"}, - {file = "SQLAlchemy-2.0.29-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:25664e18bef6dc45015b08f99c63952a53a0a61f61f2e48a9e70cec27e55f699"}, - {file = "SQLAlchemy-2.0.29-cp39-cp39-win32.whl", hash = "sha256:77d29cb6c34b14af8a484e831ab530c0f7188f8efed1c6a833a2c674bf3c26ec"}, - {file = "SQLAlchemy-2.0.29-cp39-cp39-win_amd64.whl", hash = "sha256:04c487305ab035a9548f573763915189fc0fe0824d9ba28433196f8436f1449c"}, - {file = "SQLAlchemy-2.0.29-py3-none-any.whl", hash = "sha256:dc4ee2d4ee43251905f88637d5281a8d52e916a021384ec10758826f5cbae305"}, - {file = "SQLAlchemy-2.0.29.tar.gz", hash = "sha256:bd9566b8e58cabd700bc367b60e90d9349cd16f0984973f98a9a09f9c64e86f0"}, + {file = "SQLAlchemy-2.0.30-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:3b48154678e76445c7ded1896715ce05319f74b1e73cf82d4f8b59b46e9c0ddc"}, + {file = "SQLAlchemy-2.0.30-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2753743c2afd061bb95a61a51bbb6a1a11ac1c44292fad898f10c9839a7f75b2"}, + {file = "SQLAlchemy-2.0.30-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a7bfc726d167f425d4c16269a9a10fe8630ff6d14b683d588044dcef2d0f6be7"}, + {file = "SQLAlchemy-2.0.30-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c4f61ada6979223013d9ab83a3ed003ded6959eae37d0d685db2c147e9143797"}, + {file = "SQLAlchemy-2.0.30-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:3a365eda439b7a00732638f11072907c1bc8e351c7665e7e5da91b169af794af"}, + {file = "SQLAlchemy-2.0.30-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:bba002a9447b291548e8d66fd8c96a6a7ed4f2def0bb155f4f0a1309fd2735d5"}, + {file = "SQLAlchemy-2.0.30-cp310-cp310-win32.whl", hash = "sha256:0138c5c16be3600923fa2169532205d18891b28afa817cb49b50e08f62198bb8"}, + {file = "SQLAlchemy-2.0.30-cp310-cp310-win_amd64.whl", hash = "sha256:99650e9f4cf3ad0d409fed3eec4f071fadd032e9a5edc7270cd646a26446feeb"}, + {file = "SQLAlchemy-2.0.30-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:955991a09f0992c68a499791a753523f50f71a6885531568404fa0f231832aa0"}, + {file = "SQLAlchemy-2.0.30-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f69e4c756ee2686767eb80f94c0125c8b0a0b87ede03eacc5c8ae3b54b99dc46"}, + {file = "SQLAlchemy-2.0.30-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69c9db1ce00e59e8dd09d7bae852a9add716efdc070a3e2068377e6ff0d6fdaa"}, + {file = "SQLAlchemy-2.0.30-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a1429a4b0f709f19ff3b0cf13675b2b9bfa8a7e79990003207a011c0db880a13"}, + {file = "SQLAlchemy-2.0.30-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:efedba7e13aa9a6c8407c48facfdfa108a5a4128e35f4c68f20c3407e4376aa9"}, + {file = "SQLAlchemy-2.0.30-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:16863e2b132b761891d6c49f0a0f70030e0bcac4fd208117f6b7e053e68668d0"}, + {file = "SQLAlchemy-2.0.30-cp311-cp311-win32.whl", hash = "sha256:2ecabd9ccaa6e914e3dbb2aa46b76dede7eadc8cbf1b8083c94d936bcd5ffb49"}, + {file = "SQLAlchemy-2.0.30-cp311-cp311-win_amd64.whl", hash = "sha256:0b3f4c438e37d22b83e640f825ef0f37b95db9aa2d68203f2c9549375d0b2260"}, + {file = "SQLAlchemy-2.0.30-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:5a79d65395ac5e6b0c2890935bad892eabb911c4aa8e8015067ddb37eea3d56c"}, + {file = "SQLAlchemy-2.0.30-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:9a5baf9267b752390252889f0c802ea13b52dfee5e369527da229189b8bd592e"}, + {file = "SQLAlchemy-2.0.30-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3cb5a646930c5123f8461f6468901573f334c2c63c795b9af350063a736d0134"}, + {file = "SQLAlchemy-2.0.30-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:296230899df0b77dec4eb799bcea6fbe39a43707ce7bb166519c97b583cfcab3"}, + {file = "SQLAlchemy-2.0.30-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:c62d401223f468eb4da32627bffc0c78ed516b03bb8a34a58be54d618b74d472"}, + {file = "SQLAlchemy-2.0.30-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:3b69e934f0f2b677ec111b4d83f92dc1a3210a779f69bf905273192cf4ed433e"}, + {file = "SQLAlchemy-2.0.30-cp312-cp312-win32.whl", hash = "sha256:77d2edb1f54aff37e3318f611637171e8ec71472f1fdc7348b41dcb226f93d90"}, + {file = "SQLAlchemy-2.0.30-cp312-cp312-win_amd64.whl", hash = "sha256:b6c7ec2b1f4969fc19b65b7059ed00497e25f54069407a8701091beb69e591a5"}, + {file = "SQLAlchemy-2.0.30-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:5a8e3b0a7e09e94be7510d1661339d6b52daf202ed2f5b1f9f48ea34ee6f2d57"}, + {file = "SQLAlchemy-2.0.30-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b60203c63e8f984df92035610c5fb76d941254cf5d19751faab7d33b21e5ddc0"}, + {file = "SQLAlchemy-2.0.30-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f1dc3eabd8c0232ee8387fbe03e0a62220a6f089e278b1f0aaf5e2d6210741ad"}, + {file = "SQLAlchemy-2.0.30-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:40ad017c672c00b9b663fcfcd5f0864a0a97828e2ee7ab0c140dc84058d194cf"}, + {file = "SQLAlchemy-2.0.30-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:e42203d8d20dc704604862977b1470a122e4892791fe3ed165f041e4bf447a1b"}, + {file = "SQLAlchemy-2.0.30-cp37-cp37m-win32.whl", hash = "sha256:2a4f4da89c74435f2bc61878cd08f3646b699e7d2eba97144030d1be44e27584"}, + {file = "SQLAlchemy-2.0.30-cp37-cp37m-win_amd64.whl", hash = "sha256:b6bf767d14b77f6a18b6982cbbf29d71bede087edae495d11ab358280f304d8e"}, + {file = "SQLAlchemy-2.0.30-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:bc0c53579650a891f9b83fa3cecd4e00218e071d0ba00c4890f5be0c34887ed3"}, + {file = "SQLAlchemy-2.0.30-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:311710f9a2ee235f1403537b10c7687214bb1f2b9ebb52702c5aa4a77f0b3af7"}, + {file = "SQLAlchemy-2.0.30-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:408f8b0e2c04677e9c93f40eef3ab22f550fecb3011b187f66a096395ff3d9fd"}, + {file = "SQLAlchemy-2.0.30-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:37a4b4fb0dd4d2669070fb05b8b8824afd0af57587393015baee1cf9890242d9"}, + {file = "SQLAlchemy-2.0.30-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:a943d297126c9230719c27fcbbeab57ecd5d15b0bd6bfd26e91bfcfe64220621"}, + {file = "SQLAlchemy-2.0.30-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:0a089e218654e740a41388893e090d2e2c22c29028c9d1353feb38638820bbeb"}, + {file = "SQLAlchemy-2.0.30-cp38-cp38-win32.whl", hash = "sha256:fa561138a64f949f3e889eb9ab8c58e1504ab351d6cf55259dc4c248eaa19da6"}, + {file = "SQLAlchemy-2.0.30-cp38-cp38-win_amd64.whl", hash = "sha256:7d74336c65705b986d12a7e337ba27ab2b9d819993851b140efdf029248e818e"}, + {file = "SQLAlchemy-2.0.30-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:ae8c62fe2480dd61c532ccafdbce9b29dacc126fe8be0d9a927ca3e699b9491a"}, + {file = "SQLAlchemy-2.0.30-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:2383146973a15435e4717f94c7509982770e3e54974c71f76500a0136f22810b"}, + {file = "SQLAlchemy-2.0.30-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8409de825f2c3b62ab15788635ccaec0c881c3f12a8af2b12ae4910a0a9aeef6"}, + {file = "SQLAlchemy-2.0.30-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0094c5dc698a5f78d3d1539853e8ecec02516b62b8223c970c86d44e7a80f6c7"}, + {file = "SQLAlchemy-2.0.30-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:edc16a50f5e1b7a06a2dcc1f2205b0b961074c123ed17ebda726f376a5ab0953"}, + {file = "SQLAlchemy-2.0.30-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:f7703c2010355dd28f53deb644a05fc30f796bd8598b43f0ba678878780b6e4c"}, + {file = "SQLAlchemy-2.0.30-cp39-cp39-win32.whl", hash = "sha256:1f9a727312ff6ad5248a4367358e2cf7e625e98b1028b1d7ab7b806b7d757513"}, + {file = "SQLAlchemy-2.0.30-cp39-cp39-win_amd64.whl", hash = "sha256:a0ef36b28534f2a5771191be6edb44cc2673c7b2edf6deac6562400288664221"}, + {file = "SQLAlchemy-2.0.30-py3-none-any.whl", hash = "sha256:7108d569d3990c71e26a42f60474b4c02c8586c4681af5fd67e51a044fdea86a"}, + {file = "SQLAlchemy-2.0.30.tar.gz", hash = "sha256:2b1708916730f4830bc69d6f49d37f7698b5bd7530aca7f04f785f8849e95255"}, ] [package.dependencies] @@ -4019,17 +4019,18 @@ mpmath = ">=0.19" [[package]] name = "tenacity" -version = "8.2.3" +version = "8.3.0" description = "Retry code until it succeeds" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "tenacity-8.2.3-py3-none-any.whl", hash = "sha256:ce510e327a630c9e1beaf17d42e6ffacc88185044ad85cf74c0a8887c6a0f88c"}, - {file = "tenacity-8.2.3.tar.gz", hash = "sha256:5398ef0d78e63f40007c1fb4c0bff96e1911394d2fa8d194f77619c05ff6cc8a"}, + {file = "tenacity-8.3.0-py3-none-any.whl", hash = "sha256:3649f6443dbc0d9b01b9d8020a9c4ec7a1ff5f6f3c6c8a036ef371f573fe9185"}, + {file = "tenacity-8.3.0.tar.gz", hash = "sha256:953d4e6ad24357bceffbc9707bc74349aca9d245f68eb65419cf0c249a1949a2"}, ] [package.extras] -doc = ["reno", "sphinx", "tornado (>=4.5)"] +doc = ["reno", "sphinx"] +test = ["pytest", "tornado (>=4.5)", "typeguard"] [[package]] name = "thrift" @@ -4062,13 +4063,13 @@ files = [ [[package]] name = "tqdm" -version = "4.66.2" +version = "4.66.3" description = "Fast, Extensible Progress Meter" optional = true python-versions = ">=3.7" files = [ - {file = "tqdm-4.66.2-py3-none-any.whl", hash = "sha256:1ee4f8a893eb9bef51c6e35730cebf234d5d0b6bd112b0271e10ed7c24a02bd9"}, - {file = "tqdm-4.66.2.tar.gz", hash = "sha256:6cd52cdf0fef0e0f543299cfc96fec90d7b8a7e88745f411ec33eb44d5ed3531"}, + {file = "tqdm-4.66.3-py3-none-any.whl", hash = "sha256:4f41d54107ff9a223dca80b53efe4fb654c67efaba7f47bada3ee9d50e05bd53"}, + {file = "tqdm-4.66.3.tar.gz", hash = "sha256:23097a41eba115ba99ecae40d06444c15d1c0c698d527a01c6c8bd1c5d0647e5"}, ] [package.dependencies] @@ -4082,13 +4083,13 @@ telegram = ["requests"] [[package]] name = "typing-extensions" -version = "4.11.0" +version = "4.12.0" description = "Backported and Experimental Type Hints for Python 3.8+" optional = false python-versions = ">=3.8" files = [ - {file = "typing_extensions-4.11.0-py3-none-any.whl", hash = "sha256:c1f94d72897edaf4ce775bb7558d5b79d8126906a14ea5ed1635921406c0387a"}, - {file = "typing_extensions-4.11.0.tar.gz", hash = "sha256:83f085bd5ca59c80295fc2a82ab5dac679cbe02b9f33f7d83af68e241bea51b0"}, + {file = "typing_extensions-4.12.0-py3-none-any.whl", hash = "sha256:b349c66bea9016ac22978d800cfff206d5f9816951f12a7d0ec5578b0a819594"}, + {file = "typing_extensions-4.12.0.tar.gz", hash = "sha256:8cbcdc8606ebcb0d95453ad7dc5065e6237b6aa230a31e81d0f440c30fed5fd8"}, ] [[package]] @@ -4157,13 +4158,13 @@ test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess [[package]] name = "werkzeug" -version = "3.0.1" +version = "3.0.3" description = "The comprehensive WSGI web application library." optional = false python-versions = ">=3.8" files = [ - {file = "werkzeug-3.0.1-py3-none-any.whl", hash = "sha256:90a285dc0e42ad56b34e696398b8122ee4c681833fb35b8334a095d82c56da10"}, - {file = "werkzeug-3.0.1.tar.gz", hash = "sha256:507e811ecea72b18a404947aded4b3390e1db8f826b494d76550ef45bb3b1dcc"}, + {file = "werkzeug-3.0.3-py3-none-any.whl", hash = "sha256:fc9645dc43e03e4d630d23143a04a7f947a9a3b5727cd535fdfe155a17cc48c8"}, + {file = "werkzeug-3.0.3.tar.gz", hash = "sha256:097e5bfda9f0aba8da6b8545146def481d06aa7d3266e7448e2cccf67dd8bd18"}, ] [package.dependencies] @@ -4461,4 +4462,4 @@ zstandard = ["zstandard"] [metadata] lock-version = "2.0" python-versions = "^3.8" -content-hash = "91de7f775ff1499d79db490197eee5aadc7078b5244d86e56d8626c2615645f6" +content-hash = "8024e9ca0aa700346e902b232337c8bad69e5cd6e482db4999446f6177e7646d" diff --git a/pyiceberg/catalog/__init__.py b/pyiceberg/catalog/__init__.py index 18d803fe1c..9a951b5c8e 100644 --- a/pyiceberg/catalog/__init__.py +++ b/pyiceberg/catalog/__init__.py @@ -36,7 +36,13 @@ cast, ) -from pyiceberg.exceptions import NoSuchNamespaceError, NoSuchTableError, NotInstalledError, TableAlreadyExistsError +from pyiceberg.exceptions import ( + NamespaceAlreadyExistsError, + NoSuchNamespaceError, + NoSuchTableError, + NotInstalledError, + TableAlreadyExistsError, +) from pyiceberg.io import FileIO, load_file_io from pyiceberg.manifest import ManifestFile from pyiceberg.partitioning import UNPARTITIONED_PARTITION_SPEC, PartitionSpec @@ -477,6 +483,18 @@ def create_namespace(self, namespace: Union[str, Identifier], properties: Proper NamespaceAlreadyExistsError: If a namespace with the given name already exists. """ + def create_namespace_if_not_exists(self, namespace: Union[str, Identifier], properties: Properties = EMPTY_DICT) -> None: + """Create a namespace if it does not exist. + + Args: + namespace (str | Identifier): Namespace identifier. + properties (Properties): A string dictionary of properties for the given namespace. + """ + try: + self.create_namespace(namespace, properties) + except NamespaceAlreadyExistsError: + pass + @abstractmethod def drop_namespace(self, namespace: Union[str, Identifier]) -> None: """Drop a namespace. @@ -570,7 +588,7 @@ def identifier_to_tuple(identifier: Union[str, Identifier]) -> Identifier: If the identifier is a string, it is split into a tuple on '.'. If it is a tuple, it is used as-is. Args: - identifier (str | Identifier: an identifier, either a string or tuple of strings. + identifier (str | Identifier): an identifier, either a string or tuple of strings. Returns: Identifier: a tuple of strings. @@ -601,6 +619,29 @@ def namespace_from(identifier: Union[str, Identifier]) -> Identifier: """ return Catalog.identifier_to_tuple(identifier)[:-1] + @staticmethod + def namespace_to_string( + identifier: Union[str, Identifier], err: Union[Type[ValueError], Type[NoSuchNamespaceError]] = ValueError + ) -> str: + """Transform a namespace identifier into a string. + + Args: + identifier (Union[str, Identifier]): a namespace identifier. + err (Union[Type[ValueError], Type[NoSuchNamespaceError]]): the error type to raise when identifier is empty. + + Returns: + Identifier: Namespace identifier. + """ + tuple_identifier = Catalog.identifier_to_tuple(identifier) + if len(tuple_identifier) < 1: + raise err("Empty namespace identifier") + + # Check if any segment of the tuple is an empty string + if any(segment.strip() == "" for segment in tuple_identifier): + raise err("Namespace identifier contains an empty segment or a segment with only whitespace") + + return ".".join(segment.strip() for segment in tuple_identifier) + @staticmethod def identifier_to_database( identifier: Union[str, Identifier], err: Union[Type[ValueError], Type[NoSuchNamespaceError]] = ValueError @@ -720,7 +761,7 @@ def _create_staged_table( metadata = new_table_metadata( location=location, schema=schema, partition_spec=partition_spec, sort_order=sort_order, properties=properties ) - io = load_file_io(properties=self.properties, location=metadata_location) + io = self._load_file_io(properties=properties, location=metadata_location) return StagedTable( identifier=(self.name, database_name, table_name), metadata=metadata, @@ -779,7 +820,7 @@ def _get_updated_props_and_update_summary( def _resolve_table_location(self, location: Optional[str], database_name: str, table_name: str) -> str: if not location: return self._get_default_warehouse_location(database_name, table_name) - return location + return location.rstrip("/") def _get_default_warehouse_location(self, database_name: str, table_name: str) -> str: database_properties = self.load_namespace_properties(database_name) diff --git a/pyiceberg/catalog/glue.py b/pyiceberg/catalog/glue.py index 275cda7ed0..8819c2e266 100644 --- a/pyiceberg/catalog/glue.py +++ b/pyiceberg/catalog/glue.py @@ -417,7 +417,14 @@ def register_table(self, identifier: Union[str, Identifier], metadata_location: Raises: TableAlreadyExistsError: If the table already exists """ - raise NotImplementedError + database_name, table_name = self.identifier_to_database_and_table(identifier) + properties = EMPTY_DICT + io = self._load_file_io(location=metadata_location) + file = io.new_input(metadata_location) + metadata = FromInputFile.table_metadata(file) + table_input = _construct_table_input(table_name, metadata_location, properties, metadata) + self._create_glue_table(database_name=database_name, table_name=table_name, table_input=table_input) + return self.load_table(identifier=identifier) def _commit_table(self, table_request: CommitTableRequest) -> CommitTableResponse: """Update the table. diff --git a/pyiceberg/catalog/hive.py b/pyiceberg/catalog/hive.py index 804b1105cc..83bbd50779 100644 --- a/pyiceberg/catalog/hive.py +++ b/pyiceberg/catalog/hive.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import getpass +import logging import socket import time from types import TracebackType @@ -33,6 +34,7 @@ from hive_metastore.ThriftHiveMetastore import Client from hive_metastore.ttypes import ( AlreadyExistsException, + CheckLockRequest, FieldSchema, InvalidOperationException, LockComponent, @@ -49,6 +51,7 @@ ) from hive_metastore.ttypes import Database as HiveDatabase from hive_metastore.ttypes import Table as HiveTable +from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_exponential from thrift.protocol import TBinaryProtocol from thrift.transport import TSocket, TTransport @@ -67,15 +70,22 @@ NamespaceNotEmptyError, NoSuchIcebergTableError, NoSuchNamespaceError, + NoSuchPropertyException, NoSuchTableError, TableAlreadyExistsError, + WaitingForLockException, ) -from pyiceberg.io import FileIO, load_file_io from pyiceberg.partitioning import UNPARTITIONED_PARTITION_SPEC, PartitionSpec from pyiceberg.schema import Schema, SchemaVisitor, visit from pyiceberg.serializers import FromInputFile -from pyiceberg.table import CommitTableRequest, CommitTableResponse, PropertyUtil, Table, TableProperties, update_table_metadata -from pyiceberg.table.metadata import new_table_metadata +from pyiceberg.table import ( + CommitTableRequest, + CommitTableResponse, + PropertyUtil, + StagedTable, + Table, + TableProperties, +) from pyiceberg.table.sorting import UNSORTED_SORT_ORDER, SortOrder from pyiceberg.typedef import EMPTY_DICT, Identifier, Properties from pyiceberg.types import ( @@ -111,6 +121,15 @@ HIVE2_COMPATIBLE = "hive.hive2-compatible" HIVE2_COMPATIBLE_DEFAULT = False +LOCK_CHECK_MIN_WAIT_TIME = "lock-check-min-wait-time" +LOCK_CHECK_MAX_WAIT_TIME = "lock-check-max-wait-time" +LOCK_CHECK_RETRIES = "lock-check-retries" +DEFAULT_LOCK_CHECK_MIN_WAIT_TIME = 0.1 # 100 milliseconds +DEFAULT_LOCK_CHECK_MAX_WAIT_TIME = 60 # 1 min +DEFAULT_LOCK_CHECK_RETRIES = 4 + +logger = logging.getLogger(__name__) + class _HiveClient: """Helper class to nicely open and close the transport.""" @@ -126,7 +145,7 @@ def __init__(self, uri: str, ugi: Optional[str] = None): protocol = TBinaryProtocol.TBinaryProtocol(transport) self._client = Client(protocol) - self._ugi = ugi.split(':') if ugi else None + self._ugi = ugi.split(":") if ugi else None def __enter__(self) -> Client: self._transport.open() @@ -240,10 +259,24 @@ def __init__(self, name: str, **properties: str): super().__init__(name, **properties) self._client = _HiveClient(properties["uri"], properties.get("ugi")) - def _convert_hive_into_iceberg(self, table: HiveTable, io: FileIO) -> Table: + self._lock_check_min_wait_time = PropertyUtil.property_as_float( + properties, LOCK_CHECK_MIN_WAIT_TIME, DEFAULT_LOCK_CHECK_MIN_WAIT_TIME + ) + self._lock_check_max_wait_time = PropertyUtil.property_as_float( + properties, LOCK_CHECK_MAX_WAIT_TIME, DEFAULT_LOCK_CHECK_MAX_WAIT_TIME + ) + self._lock_check_retries = PropertyUtil.property_as_float( + properties, + LOCK_CHECK_RETRIES, + DEFAULT_LOCK_CHECK_RETRIES, + ) + + def _convert_hive_into_iceberg(self, table: HiveTable) -> Table: properties: Dict[str, str] = table.parameters if TABLE_TYPE not in properties: - raise NoSuchTableError(f"Property table_type missing, could not determine type: {table.dbName}.{table.tableName}") + raise NoSuchPropertyException( + f"Property table_type missing, could not determine type: {table.dbName}.{table.tableName}" + ) table_type = properties[TABLE_TYPE] if table_type.lower() != ICEBERG: @@ -254,8 +287,9 @@ def _convert_hive_into_iceberg(self, table: HiveTable, io: FileIO) -> Table: if prop_metadata_location := properties.get(METADATA_LOCATION): metadata_location = prop_metadata_location else: - raise NoSuchTableError(f"Table property {METADATA_LOCATION} is missing") + raise NoSuchPropertyException(f"Table property {METADATA_LOCATION} is missing") + io = self._load_file_io(location=metadata_location) file = io.new_input(metadata_location) metadata = FromInputFile.table_metadata(file) return Table( @@ -266,6 +300,38 @@ def _convert_hive_into_iceberg(self, table: HiveTable, io: FileIO) -> Table: catalog=self, ) + def _convert_iceberg_into_hive(self, table: Table) -> HiveTable: + identifier_tuple = self.identifier_to_tuple_without_catalog(table.identifier) + database_name, table_name = self.identifier_to_database_and_table(identifier_tuple, NoSuchTableError) + current_time_millis = int(time.time() * 1000) + + return HiveTable( + dbName=database_name, + tableName=table_name, + owner=table.properties[OWNER] if table.properties and OWNER in table.properties else getpass.getuser(), + createTime=current_time_millis // 1000, + lastAccessTime=current_time_millis // 1000, + sd=_construct_hive_storage_descriptor( + table.schema(), + table.location(), + PropertyUtil.property_as_bool(self.properties, HIVE2_COMPATIBLE, HIVE2_COMPATIBLE_DEFAULT), + ), + tableType=EXTERNAL_TABLE, + parameters=_construct_parameters(table.metadata_location), + ) + + def _create_hive_table(self, open_client: Client, hive_table: HiveTable) -> None: + try: + open_client.create_table(hive_table) + except AlreadyExistsException as e: + raise TableAlreadyExistsError(f"Table {hive_table.dbName}.{hive_table.tableName} already exists") from e + + def _get_hive_table(self, open_client: Client, database_name: str, table_name: str) -> HiveTable: + try: + return open_client.get_table(dbname=database_name, tbl_name=table_name) + except NoSuchObjectException as e: + raise NoSuchTableError(f"Table does not exists: {table_name}") from e + def create_table( self, identifier: Union[str, Identifier], @@ -292,45 +358,25 @@ def create_table( AlreadyExistsError: If a table with the name already exists. ValueError: If the identifier is invalid. """ - schema: Schema = self._convert_schema_if_needed(schema) # type: ignore - properties = {**DEFAULT_PROPERTIES, **properties} - database_name, table_name = self.identifier_to_database_and_table(identifier) - current_time_millis = int(time.time() * 1000) - - location = self._resolve_table_location(location, database_name, table_name) - - metadata_location = self._get_metadata_location(location=location) - metadata = new_table_metadata( - location=location, + staged_table = self._create_staged_table( + identifier=identifier, schema=schema, + location=location, partition_spec=partition_spec, sort_order=sort_order, properties=properties, ) - io = load_file_io({**self.properties, **properties}, location=location) - self._write_metadata(metadata, io, metadata_location) + database_name, table_name = self.identifier_to_database_and_table(identifier) - tbl = HiveTable( - dbName=database_name, - tableName=table_name, - owner=properties[OWNER] if properties and OWNER in properties else getpass.getuser(), - createTime=current_time_millis // 1000, - lastAccessTime=current_time_millis // 1000, - sd=_construct_hive_storage_descriptor( - schema, location, PropertyUtil.property_as_bool(self.properties, HIVE2_COMPATIBLE, HIVE2_COMPATIBLE_DEFAULT) - ), - tableType=EXTERNAL_TABLE, - parameters=_construct_parameters(metadata_location), - ) - try: - with self._client as open_client: - open_client.create_table(tbl) - hive_table = open_client.get_table(dbname=database_name, tbl_name=table_name) - except AlreadyExistsException as e: - raise TableAlreadyExistsError(f"Table {database_name}.{table_name} already exists") from e + self._write_metadata(staged_table.metadata, staged_table.io, staged_table.metadata_location) + tbl = self._convert_iceberg_into_hive(staged_table) + + with self._client as open_client: + self._create_hive_table(open_client, tbl) + hive_table = open_client.get_table(dbname=database_name, tbl_name=table_name) - return self._convert_hive_into_iceberg(hive_table, io) + return self._convert_hive_into_iceberg(hive_table) def register_table(self, identifier: Union[str, Identifier], metadata_location: str) -> Table: """Register a new table using existing metadata. @@ -356,6 +402,26 @@ def _create_lock_request(self, database_name: str, table_name: str) -> LockReque return lock_request + def _wait_for_lock(self, database_name: str, table_name: str, lockid: int, open_client: Client) -> LockResponse: + @retry( + retry=retry_if_exception_type(WaitingForLockException), + wait=wait_exponential(multiplier=2, min=self._lock_check_min_wait_time, max=self._lock_check_max_wait_time), + stop=stop_after_attempt(self._lock_check_retries), + reraise=True, + ) + def _do_wait_for_lock() -> LockResponse: + response: LockResponse = open_client.check_lock(CheckLockRequest(lockid=lockid)) + if response.state == LockState.ACQUIRED: + return response + elif response.state == LockState.WAITING: + msg = f"Wait on lock for {database_name}.{table_name}" + logger.warning(msg) + raise WaitingForLockException(msg) + else: + raise CommitFailedException(f"Failed to check lock for {database_name}.{table_name}, state: {response.state}") + + return _do_wait_for_lock() + def _commit_table(self, table_request: CommitTableRequest) -> CommitTableResponse: """Update the table. @@ -380,36 +446,57 @@ def _commit_table(self, table_request: CommitTableRequest) -> CommitTableRespons try: if lock.state != LockState.ACQUIRED: - raise CommitFailedException(f"Failed to acquire lock for {table_request.identifier}, state: {lock.state}") - - hive_table = open_client.get_table(dbname=database_name, tbl_name=table_name) - io = load_file_io({**self.properties, **hive_table.parameters}, hive_table.sd.location) - current_table = self._convert_hive_into_iceberg(hive_table, io) - - base_metadata = current_table.metadata - for requirement in table_request.requirements: - requirement.validate(base_metadata) - - updated_metadata = update_table_metadata(base_metadata, table_request.updates) - if updated_metadata == base_metadata: + if lock.state == LockState.WAITING: + self._wait_for_lock(database_name, table_name, lock.lockid, open_client) + else: + raise CommitFailedException(f"Failed to acquire lock for {table_request.identifier}, state: {lock.state}") + + hive_table: Optional[HiveTable] + current_table: Optional[Table] + try: + hive_table = self._get_hive_table(open_client, database_name, table_name) + current_table = self._convert_hive_into_iceberg(hive_table) + except NoSuchTableError: + hive_table = None + current_table = None + + updated_staged_table = self._update_and_stage_table(current_table, table_request) + if current_table and updated_staged_table.metadata == current_table.metadata: # no changes, do nothing - return CommitTableResponse(metadata=base_metadata, metadata_location=current_table.metadata_location) - - # write new metadata - new_metadata_version = self._parse_metadata_version(current_table.metadata_location) + 1 - new_metadata_location = self._get_metadata_location(current_table.metadata.location, new_metadata_version) - self._write_metadata(updated_metadata, current_table.io, new_metadata_location) - - hive_table.parameters = _construct_parameters( - metadata_location=new_metadata_location, previous_metadata_location=current_table.metadata_location + return CommitTableResponse(metadata=current_table.metadata, metadata_location=current_table.metadata_location) + self._write_metadata( + metadata=updated_staged_table.metadata, + io=updated_staged_table.io, + metadata_path=updated_staged_table.metadata_location, ) - open_client.alter_table(dbname=database_name, tbl_name=table_name, new_tbl=hive_table) - except NoSuchObjectException as e: - raise NoSuchTableError(f"Table does not exist: {table_name}") from e + + if hive_table and current_table: + # Table exists, update it. + hive_table.parameters = _construct_parameters( + metadata_location=updated_staged_table.metadata_location, + previous_metadata_location=current_table.metadata_location, + ) + open_client.alter_table(dbname=database_name, tbl_name=table_name, new_tbl=hive_table) + else: + # Table does not exist, create it. + hive_table = self._convert_iceberg_into_hive( + StagedTable( + identifier=(self.name, database_name, table_name), + metadata=updated_staged_table.metadata, + metadata_location=updated_staged_table.metadata_location, + io=updated_staged_table.io, + catalog=self, + ) + ) + self._create_hive_table(open_client, hive_table) + except WaitingForLockException as e: + raise CommitFailedException(f"Failed to acquire lock for {table_request.identifier}, state: {lock.state}") from e finally: open_client.unlock(UnlockRequest(lockid=lock.lockid)) - return CommitTableResponse(metadata=updated_metadata, metadata_location=new_metadata_location) + return CommitTableResponse( + metadata=updated_staged_table.metadata, metadata_location=updated_staged_table.metadata_location + ) def load_table(self, identifier: Union[str, Identifier]) -> Table: """Load the table's metadata and return the table instance. @@ -428,14 +515,11 @@ def load_table(self, identifier: Union[str, Identifier]) -> Table: """ identifier_tuple = self.identifier_to_tuple_without_catalog(identifier) database_name, table_name = self.identifier_to_database_and_table(identifier_tuple, NoSuchTableError) - try: - with self._client as open_client: - hive_table = open_client.get_table(dbname=database_name, tbl_name=table_name) - except NoSuchObjectException as e: - raise NoSuchTableError(f"Table does not exists: {table_name}") from e - io = load_file_io({**self.properties, **hive_table.parameters}, hive_table.sd.location) - return self._convert_hive_into_iceberg(hive_table, io) + with self._client as open_client: + hive_table = self._get_hive_table(open_client, database_name, table_name) + + return self._convert_hive_into_iceberg(hive_table) def drop_table(self, identifier: Union[str, Identifier]) -> None: """Drop a table. diff --git a/pyiceberg/catalog/rest.py b/pyiceberg/catalog/rest.py index 53e3f6a123..2474b89853 100644 --- a/pyiceberg/catalog/rest.py +++ b/pyiceberg/catalog/rest.py @@ -152,7 +152,7 @@ class CreateTableRequest(IcebergBaseModel): properties: Dict[str, str] = Field(default_factory=dict) # validators - @field_validator('properties', mode='before') + @field_validator("properties", mode="before") def transform_properties_dict_value_to_str(cls, properties: Properties) -> Dict[str, str]: return transform_dict_value_to_str(properties) @@ -519,6 +519,8 @@ def _create_table( fresh_sort_order = assign_fresh_sort_order_ids(sort_order, iceberg_schema, fresh_schema) namespace_and_table = self._split_identifier_for_path(identifier) + if location: + location = location.rstrip("/") request = CreateTableRequest( name=namespace_and_table["table"], location=location, @@ -713,7 +715,7 @@ def create_namespace(self, namespace: Union[str, Identifier], properties: Proper try: response.raise_for_status() except HTTPError as exc: - self._handle_non_200_response(exc, {404: NoSuchNamespaceError, 409: NamespaceAlreadyExistsError}) + self._handle_non_200_response(exc, {409: NamespaceAlreadyExistsError}) @retry(**_RETRY_ARGS) def drop_namespace(self, namespace: Union[str, Identifier]) -> None: @@ -788,4 +790,4 @@ def table_exists(self, identifier: Union[str, Identifier]) -> bool: response = self._session.head( self.url(Endpoints.load_table, prefixed=True, **self._split_identifier_for_path(identifier_tuple)) ) - return response.status_code == 200 + return response.status_code in (200, 204) diff --git a/pyiceberg/catalog/sql.py b/pyiceberg/catalog/sql.py index 978109b2a3..ff7831d77f 100644 --- a/pyiceberg/catalog/sql.py +++ b/pyiceberg/catalog/sql.py @@ -43,6 +43,7 @@ from pyiceberg.catalog import ( METADATA_LOCATION, + Catalog, MetastoreCatalog, PropertiesUpdateSummary, ) @@ -59,7 +60,7 @@ from pyiceberg.partitioning import UNPARTITIONED_PARTITION_SPEC, PartitionSpec from pyiceberg.schema import Schema from pyiceberg.serializers import FromInputFile -from pyiceberg.table import CommitTableRequest, CommitTableResponse, Table, update_table_metadata +from pyiceberg.table import CommitTableRequest, CommitTableResponse, Table from pyiceberg.table.metadata import new_table_metadata from pyiceberg.table.sorting import UNSORTED_SORT_ORDER, SortOrder from pyiceberg.typedef import EMPTY_DICT, Identifier, Properties @@ -94,6 +95,16 @@ class IcebergNamespaceProperties(SqlCatalogBaseTable): class SqlCatalog(MetastoreCatalog): + """Implementation of a SQL based catalog. + + In the `JDBCCatalog` implementation, a `Namespace` is composed of a list of strings separated by dots: `'ns1.ns2.ns3'`. + And you can have as many levels as you want, but you need at least one. The `SqlCatalog` honors the same convention. + + In the `JDBCCatalog` implementation, a `TableIdentifier` is composed of an optional `Namespace` and a table name. + When a `Namespace` is present, the full name will be `'ns1.ns2.ns3.table'`. A valid `TableIdentifier` could be `'name'` (no namespace). + The `SqlCatalog` has a different convention where a `TableIdentifier` requires a `Namespace`. + """ + def __init__(self, name: str, **properties: str): super().__init__(name, **properties) @@ -136,7 +147,7 @@ def _convert_orm_to_iceberg(self, orm_table: IcebergTables) -> Table: file = io.new_input(metadata_location) metadata = FromInputFile.table_metadata(file) return Table( - identifier=(self.name, table_namespace, table_name), + identifier=(self.name,) + Catalog.identifier_to_tuple(table_namespace) + (table_name,), metadata=metadata, metadata_location=metadata_location, io=self._load_file_io(metadata.properties, metadata_location), @@ -173,11 +184,14 @@ def create_table( """ schema: Schema = self._convert_schema_if_needed(schema) # type: ignore - database_name, table_name = self.identifier_to_database_and_table(identifier) - if not self._namespace_exists(database_name): - raise NoSuchNamespaceError(f"Namespace does not exist: {database_name}") + identifier_nocatalog = self.identifier_to_tuple_without_catalog(identifier) + namespace_identifier = Catalog.namespace_from(identifier_nocatalog) + table_name = Catalog.table_name_from(identifier_nocatalog) + if not self._namespace_exists(namespace_identifier): + raise NoSuchNamespaceError(f"Namespace does not exist: {namespace_identifier}") - location = self._resolve_table_location(location, database_name, table_name) + namespace = Catalog.namespace_to_string(namespace_identifier) + location = self._resolve_table_location(location, namespace, table_name) metadata_location = self._get_metadata_location(location=location) metadata = new_table_metadata( location=location, schema=schema, partition_spec=partition_spec, sort_order=sort_order, properties=properties @@ -190,7 +204,7 @@ def create_table( session.add( IcebergTables( catalog_name=self.name, - table_namespace=database_name, + table_namespace=namespace, table_name=table_name, metadata_location=metadata_location, previous_metadata_location=None, @@ -198,7 +212,7 @@ def create_table( ) session.commit() except IntegrityError as e: - raise TableAlreadyExistsError(f"Table {database_name}.{table_name} already exists") from e + raise TableAlreadyExistsError(f"Table {namespace}.{table_name} already exists") from e return self.load_table(identifier=identifier) @@ -216,16 +230,19 @@ def register_table(self, identifier: Union[str, Identifier], metadata_location: TableAlreadyExistsError: If the table already exists NoSuchNamespaceError: If namespace does not exist """ - database_name, table_name = self.identifier_to_database_and_table(identifier) - if not self._namespace_exists(database_name): - raise NoSuchNamespaceError(f"Namespace does not exist: {database_name}") + identifier_tuple = self.identifier_to_tuple_without_catalog(identifier) + namespace_tuple = Catalog.namespace_from(identifier_tuple) + namespace = Catalog.namespace_to_string(namespace_tuple) + table_name = Catalog.table_name_from(identifier_tuple) + if not self._namespace_exists(namespace): + raise NoSuchNamespaceError(f"Namespace does not exist: {namespace}") with Session(self.engine) as session: try: session.add( IcebergTables( catalog_name=self.name, - table_namespace=database_name, + table_namespace=namespace, table_name=table_name, metadata_location=metadata_location, previous_metadata_location=None, @@ -233,7 +250,7 @@ def register_table(self, identifier: Union[str, Identifier], metadata_location: ) session.commit() except IntegrityError as e: - raise TableAlreadyExistsError(f"Table {database_name}.{table_name} already exists") from e + raise TableAlreadyExistsError(f"Table {namespace}.{table_name} already exists") from e return self.load_table(identifier=identifier) @@ -253,17 +270,19 @@ def load_table(self, identifier: Union[str, Identifier]) -> Table: NoSuchTableError: If a table with the name does not exist. """ identifier_tuple = self.identifier_to_tuple_without_catalog(identifier) - database_name, table_name = self.identifier_to_database_and_table(identifier_tuple, NoSuchTableError) + namespace_tuple = Catalog.namespace_from(identifier_tuple) + namespace = Catalog.namespace_to_string(namespace_tuple) + table_name = Catalog.table_name_from(identifier_tuple) with Session(self.engine) as session: stmt = select(IcebergTables).where( IcebergTables.catalog_name == self.name, - IcebergTables.table_namespace == database_name, + IcebergTables.table_namespace == namespace, IcebergTables.table_name == table_name, ) result = session.scalar(stmt) if result: return self._convert_orm_to_iceberg(result) - raise NoSuchTableError(f"Table does not exist: {database_name}.{table_name}") + raise NoSuchTableError(f"Table does not exist: {namespace}.{table_name}") def drop_table(self, identifier: Union[str, Identifier]) -> None: """Drop a table. @@ -275,18 +294,20 @@ def drop_table(self, identifier: Union[str, Identifier]) -> None: NoSuchTableError: If a table with the name does not exist. """ identifier_tuple = self.identifier_to_tuple_without_catalog(identifier) - database_name, table_name = self.identifier_to_database_and_table(identifier_tuple, NoSuchTableError) + namespace_tuple = Catalog.namespace_from(identifier_tuple) + namespace = Catalog.namespace_to_string(namespace_tuple) + table_name = Catalog.table_name_from(identifier_tuple) with Session(self.engine) as session: if self.engine.dialect.supports_sane_rowcount: res = session.execute( delete(IcebergTables).where( IcebergTables.catalog_name == self.name, - IcebergTables.table_namespace == database_name, + IcebergTables.table_namespace == namespace, IcebergTables.table_name == table_name, ) ) if res.rowcount < 1: - raise NoSuchTableError(f"Table does not exist: {database_name}.{table_name}") + raise NoSuchTableError(f"Table does not exist: {namespace}.{table_name}") else: try: tbl = ( @@ -294,14 +315,14 @@ def drop_table(self, identifier: Union[str, Identifier]) -> None: .with_for_update(of=IcebergTables) .filter( IcebergTables.catalog_name == self.name, - IcebergTables.table_namespace == database_name, + IcebergTables.table_namespace == namespace, IcebergTables.table_name == table_name, ) .one() ) session.delete(tbl) except NoResultFound as e: - raise NoSuchTableError(f"Table does not exist: {database_name}.{table_name}") from e + raise NoSuchTableError(f"Table does not exist: {namespace}.{table_name}") from e session.commit() def rename_table(self, from_identifier: Union[str, Identifier], to_identifier: Union[str, Identifier]) -> Table: @@ -320,10 +341,15 @@ def rename_table(self, from_identifier: Union[str, Identifier], to_identifier: U NoSuchNamespaceError: If the target namespace does not exist. """ from_identifier_tuple = self.identifier_to_tuple_without_catalog(from_identifier) - from_database_name, from_table_name = self.identifier_to_database_and_table(from_identifier_tuple, NoSuchTableError) - to_database_name, to_table_name = self.identifier_to_database_and_table(to_identifier) - if not self._namespace_exists(to_database_name): - raise NoSuchNamespaceError(f"Namespace does not exist: {to_database_name}") + to_identifier_tuple = self.identifier_to_tuple_without_catalog(to_identifier) + from_namespace_tuple = Catalog.namespace_from(from_identifier_tuple) + from_namespace = Catalog.namespace_to_string(from_namespace_tuple) + from_table_name = Catalog.table_name_from(from_identifier_tuple) + to_namespace_tuple = Catalog.namespace_from(to_identifier_tuple) + to_namespace = Catalog.namespace_to_string(to_namespace_tuple) + to_table_name = Catalog.table_name_from(to_identifier_tuple) + if not self._namespace_exists(to_namespace): + raise NoSuchNamespaceError(f"Namespace does not exist: {to_namespace}") with Session(self.engine) as session: try: if self.engine.dialect.supports_sane_rowcount: @@ -331,10 +357,10 @@ def rename_table(self, from_identifier: Union[str, Identifier], to_identifier: U update(IcebergTables) .where( IcebergTables.catalog_name == self.name, - IcebergTables.table_namespace == from_database_name, + IcebergTables.table_namespace == from_namespace, IcebergTables.table_name == from_table_name, ) - .values(table_namespace=to_database_name, table_name=to_table_name) + .values(table_namespace=to_namespace, table_name=to_table_name) ) result = session.execute(stmt) if result.rowcount < 1: @@ -346,18 +372,18 @@ def rename_table(self, from_identifier: Union[str, Identifier], to_identifier: U .with_for_update(of=IcebergTables) .filter( IcebergTables.catalog_name == self.name, - IcebergTables.table_namespace == from_database_name, + IcebergTables.table_namespace == from_namespace, IcebergTables.table_name == from_table_name, ) .one() ) - tbl.table_namespace = to_database_name + tbl.table_namespace = to_namespace tbl.table_name = to_table_name except NoResultFound as e: raise NoSuchTableError(f"Table does not exist: {from_table_name}") from e session.commit() except IntegrityError as e: - raise TableAlreadyExistsError(f"Table {to_database_name}.{to_table_name} already exists") from e + raise TableAlreadyExistsError(f"Table {to_namespace}.{to_table_name} already exists") from e return self.load_table(to_identifier) def _commit_table(self, table_request: CommitTableRequest) -> CommitTableResponse: @@ -376,60 +402,87 @@ def _commit_table(self, table_request: CommitTableRequest) -> CommitTableRespons identifier_tuple = self.identifier_to_tuple_without_catalog( tuple(table_request.identifier.namespace.root + [table_request.identifier.name]) ) - current_table = self.load_table(identifier_tuple) - database_name, table_name = self.identifier_to_database_and_table(identifier_tuple, NoSuchTableError) - base_metadata = current_table.metadata - for requirement in table_request.requirements: - requirement.validate(base_metadata) - - updated_metadata = update_table_metadata(base_metadata, table_request.updates) - if updated_metadata == base_metadata: + namespace_tuple = Catalog.namespace_from(identifier_tuple) + namespace = Catalog.namespace_to_string(namespace_tuple) + table_name = Catalog.table_name_from(identifier_tuple) + + current_table: Optional[Table] + try: + current_table = self.load_table(identifier_tuple) + except NoSuchTableError: + current_table = None + + updated_staged_table = self._update_and_stage_table(current_table, table_request) + if current_table and updated_staged_table.metadata == current_table.metadata: # no changes, do nothing - return CommitTableResponse(metadata=base_metadata, metadata_location=current_table.metadata_location) - - # write new metadata - new_metadata_version = self._parse_metadata_version(current_table.metadata_location) + 1 - new_metadata_location = self._get_metadata_location(current_table.metadata.location, new_metadata_version) - self._write_metadata(updated_metadata, current_table.io, new_metadata_location) + return CommitTableResponse(metadata=current_table.metadata, metadata_location=current_table.metadata_location) + self._write_metadata( + metadata=updated_staged_table.metadata, + io=updated_staged_table.io, + metadata_path=updated_staged_table.metadata_location, + ) with Session(self.engine) as session: - if self.engine.dialect.supports_sane_rowcount: - stmt = ( - update(IcebergTables) - .where( - IcebergTables.catalog_name == self.name, - IcebergTables.table_namespace == database_name, - IcebergTables.table_name == table_name, - IcebergTables.metadata_location == current_table.metadata_location, - ) - .values(metadata_location=new_metadata_location, previous_metadata_location=current_table.metadata_location) - ) - result = session.execute(stmt) - if result.rowcount < 1: - raise CommitFailedException(f"Table has been updated by another process: {database_name}.{table_name}") - else: - try: - tbl = ( - session.query(IcebergTables) - .with_for_update(of=IcebergTables) - .filter( + if current_table: + # table exists, update it + if self.engine.dialect.supports_sane_rowcount: + stmt = ( + update(IcebergTables) + .where( IcebergTables.catalog_name == self.name, - IcebergTables.table_namespace == database_name, + IcebergTables.table_namespace == namespace, IcebergTables.table_name == table_name, IcebergTables.metadata_location == current_table.metadata_location, ) - .one() + .values( + metadata_location=updated_staged_table.metadata_location, + previous_metadata_location=current_table.metadata_location, + ) ) - tbl.metadata_location = new_metadata_location - tbl.previous_metadata_location = current_table.metadata_location - except NoResultFound as e: - raise CommitFailedException(f"Table has been updated by another process: {database_name}.{table_name}") from e - session.commit() + result = session.execute(stmt) + if result.rowcount < 1: + raise CommitFailedException(f"Table has been updated by another process: {namespace}.{table_name}") + else: + try: + tbl = ( + session.query(IcebergTables) + .with_for_update(of=IcebergTables) + .filter( + IcebergTables.catalog_name == self.name, + IcebergTables.table_namespace == namespace, + IcebergTables.table_name == table_name, + IcebergTables.metadata_location == current_table.metadata_location, + ) + .one() + ) + tbl.metadata_location = updated_staged_table.metadata_location + tbl.previous_metadata_location = current_table.metadata_location + except NoResultFound as e: + raise CommitFailedException(f"Table has been updated by another process: {namespace}.{table_name}") from e + session.commit() + else: + # table does not exist, create it + try: + session.add( + IcebergTables( + catalog_name=self.name, + table_namespace=namespace, + table_name=table_name, + metadata_location=updated_staged_table.metadata_location, + previous_metadata_location=None, + ) + ) + session.commit() + except IntegrityError as e: + raise TableAlreadyExistsError(f"Table {namespace}.{table_name} already exists") from e - return CommitTableResponse(metadata=updated_metadata, metadata_location=new_metadata_location) + return CommitTableResponse( + metadata=updated_staged_table.metadata, metadata_location=updated_staged_table.metadata_location + ) def _namespace_exists(self, identifier: Union[str, Identifier]) -> bool: - namespace = self.identifier_to_database(identifier) + namespace_tuple = Catalog.identifier_to_tuple(identifier) + namespace = Catalog.namespace_to_string(namespace_tuple, NoSuchNamespaceError) with Session(self.engine) as session: stmt = ( select(IcebergTables) @@ -462,18 +515,20 @@ def create_namespace(self, namespace: Union[str, Identifier], properties: Proper Raises: NamespaceAlreadyExistsError: If a namespace with the given name already exists. """ + if self._namespace_exists(namespace): + raise NamespaceAlreadyExistsError(f"Namespace {namespace} already exists") + if not properties: properties = IcebergNamespaceProperties.NAMESPACE_MINIMAL_PROPERTIES - database_name = self.identifier_to_database(namespace) - if self._namespace_exists(database_name): - raise NamespaceAlreadyExistsError(f"Database {database_name} already exists") - create_properties = properties if properties else IcebergNamespaceProperties.NAMESPACE_MINIMAL_PROPERTIES with Session(self.engine) as session: for key, value in create_properties.items(): session.add( IcebergNamespaceProperties( - catalog_name=self.name, namespace=database_name, property_key=key, property_value=value + catalog_name=self.name, + namespace=Catalog.namespace_to_string(namespace, NoSuchNamespaceError), + property_key=key, + property_value=value, ) ) session.commit() @@ -488,16 +543,16 @@ def drop_namespace(self, namespace: Union[str, Identifier]) -> None: NoSuchNamespaceError: If a namespace with the given name does not exist. NamespaceNotEmptyError: If the namespace is not empty. """ - database_name = self.identifier_to_database(namespace, NoSuchNamespaceError) - if self._namespace_exists(database_name): - if tables := self.list_tables(database_name): - raise NamespaceNotEmptyError(f"Database {database_name} is not empty. {len(tables)} tables exist.") + if self._namespace_exists(namespace): + namespace_str = Catalog.namespace_to_string(namespace) + if tables := self.list_tables(namespace): + raise NamespaceNotEmptyError(f"Namespace {namespace_str} is not empty. {len(tables)} tables exist.") with Session(self.engine) as session: session.execute( delete(IcebergNamespaceProperties).where( IcebergNamespaceProperties.catalog_name == self.name, - IcebergNamespaceProperties.namespace == database_name, + IcebergNamespaceProperties.namespace == namespace_str, ) ) session.commit() @@ -516,14 +571,14 @@ def list_tables(self, namespace: Union[str, Identifier]) -> List[Identifier]: Raises: NoSuchNamespaceError: If a namespace with the given name does not exist. """ - database_name = self.identifier_to_database(namespace, NoSuchNamespaceError) + if namespace and not self._namespace_exists(namespace): + raise NoSuchNamespaceError(f"Namespace does not exist: {namespace}") - stmt = select(IcebergTables).where( - IcebergTables.catalog_name == self.name, IcebergTables.table_namespace == database_name - ) + namespace = Catalog.namespace_to_string(namespace) + stmt = select(IcebergTables).where(IcebergTables.catalog_name == self.name, IcebergTables.table_namespace == namespace) with Session(self.engine) as session: result = session.scalars(stmt) - return [(table.table_namespace, table.table_name) for table in result] + return [(Catalog.identifier_to_tuple(table.table_namespace) + (table.table_name,)) for table in result] def list_namespaces(self, namespace: Union[str, Identifier] = ()) -> List[Identifier]: """List namespaces from the given namespace. If not given, list top-level namespaces from the catalog. @@ -543,15 +598,15 @@ def list_namespaces(self, namespace: Union[str, Identifier] = ()) -> List[Identi table_stmt = select(IcebergTables.table_namespace).where(IcebergTables.catalog_name == self.name) namespace_stmt = select(IcebergNamespaceProperties.namespace).where(IcebergNamespaceProperties.catalog_name == self.name) if namespace: - database_name = self.identifier_to_database(namespace, NoSuchNamespaceError) - table_stmt = table_stmt.where(IcebergTables.table_namespace.like(database_name)) - namespace_stmt = namespace_stmt.where(IcebergNamespaceProperties.namespace.like(database_name)) + namespace_str = Catalog.namespace_to_string(namespace, NoSuchNamespaceError) + table_stmt = table_stmt.where(IcebergTables.table_namespace.like(namespace_str)) + namespace_stmt = namespace_stmt.where(IcebergNamespaceProperties.namespace.like(namespace_str)) stmt = union( table_stmt, namespace_stmt, ) with Session(self.engine) as session: - return [self.identifier_to_tuple(namespace_col) for namespace_col in session.execute(stmt).scalars()] + return [Catalog.identifier_to_tuple(namespace_col) for namespace_col in session.execute(stmt).scalars()] def load_namespace_properties(self, namespace: Union[str, Identifier]) -> Properties: """Get properties for a namespace. @@ -565,12 +620,12 @@ def load_namespace_properties(self, namespace: Union[str, Identifier]) -> Proper Raises: NoSuchNamespaceError: If a namespace with the given name does not exist. """ - database_name = self.identifier_to_database(namespace) - if not self._namespace_exists(database_name): - raise NoSuchNamespaceError(f"Database {database_name} does not exists") + namespace_str = Catalog.namespace_to_string(namespace) + if not self._namespace_exists(namespace): + raise NoSuchNamespaceError(f"Namespace {namespace_str} does not exists") stmt = select(IcebergNamespaceProperties).where( - IcebergNamespaceProperties.catalog_name == self.name, IcebergNamespaceProperties.namespace == database_name + IcebergNamespaceProperties.catalog_name == self.name, IcebergNamespaceProperties.namespace == namespace_str ) with Session(self.engine) as session: result = session.scalars(stmt) @@ -590,9 +645,9 @@ def update_namespace_properties( NoSuchNamespaceError: If a namespace with the given name does not exist. ValueError: If removals and updates have overlapping keys. """ - database_name = self.identifier_to_database(namespace) - if not self._namespace_exists(database_name): - raise NoSuchNamespaceError(f"Database {database_name} does not exists") + namespace_str = Catalog.namespace_to_string(namespace) + if not self._namespace_exists(namespace): + raise NoSuchNamespaceError(f"Namespace {namespace_str} does not exists") current_properties = self.load_namespace_properties(namespace=namespace) properties_update_summary = self._get_updated_props_and_update_summary( @@ -603,7 +658,7 @@ def update_namespace_properties( if removals: delete_stmt = delete(IcebergNamespaceProperties).where( IcebergNamespaceProperties.catalog_name == self.name, - IcebergNamespaceProperties.namespace == database_name, + IcebergNamespaceProperties.namespace == namespace_str, IcebergNamespaceProperties.property_key.in_(removals), ) session.execute(delete_stmt) @@ -614,14 +669,14 @@ def update_namespace_properties( # This is not a problem since it runs in a single transaction delete_stmt = delete(IcebergNamespaceProperties).where( IcebergNamespaceProperties.catalog_name == self.name, - IcebergNamespaceProperties.namespace == database_name, + IcebergNamespaceProperties.namespace == namespace_str, IcebergNamespaceProperties.property_key.in_(set(updates.keys())), ) session.execute(delete_stmt) insert_stmt = insert(IcebergNamespaceProperties) for property_key, property_value in updates.items(): insert_stmt = insert_stmt.values( - catalog_name=self.name, namespace=database_name, property_key=property_key, property_value=property_value + catalog_name=self.name, namespace=namespace_str, property_key=property_key, property_value=property_value ) session.execute(insert_stmt) session.commit() diff --git a/pyiceberg/cli/console.py b/pyiceberg/cli/console.py index 0fbda10960..d1833df081 100644 --- a/pyiceberg/cli/console.py +++ b/pyiceberg/cli/console.py @@ -112,9 +112,13 @@ def list(ctx: Context, parent: Optional[str]) -> None: # pylint: disable=redefi """List tables or namespaces.""" catalog, output = _catalog_and_output(ctx) - identifiers = catalog.list_namespaces(parent or ()) - if not identifiers and parent: + identifiers = [] + if parent: + # Do we have tables under parent namespace? identifiers = catalog.list_tables(parent) + if not identifiers: + # List hierarchical namespaces if parent, root namespaces otherwise. + identifiers = catalog.list_namespaces(parent or ()) output.identifiers(identifiers) diff --git a/pyiceberg/exceptions.py b/pyiceberg/exceptions.py index 64356b11a4..c7e37ba7ca 100644 --- a/pyiceberg/exceptions.py +++ b/pyiceberg/exceptions.py @@ -110,3 +110,7 @@ class CommitFailedException(Exception): class CommitStateUnknownException(RESTError): """Commit failed due to unknown reason.""" + + +class WaitingForLockException(Exception): + """Need to wait for a lock, try again.""" diff --git a/pyiceberg/expressions/parser.py b/pyiceberg/expressions/parser.py index 8873907813..107d2349db 100644 --- a/pyiceberg/expressions/parser.py +++ b/pyiceberg/expressions/parser.py @@ -78,7 +78,7 @@ identifier = Word(alphas, alphanums + "_$").set_results_name("identifier") column = DelimitedList(identifier, delim=".", combine=False).set_results_name("column") -like_regex = r'(?P(?(?(?(? BooleanExpression: match = re.search(like_regex, literal_like.value) - if match and match.groupdict()['invalid_wildcard']: + if match and match.groupdict()["invalid_wildcard"]: raise ValueError("LIKE expressions only supports wildcard, '%', at the end of a string") - elif match and match.groupdict()['valid_wildcard']: - return StartsWith(result.column, StringLiteral(literal_like.value[:-1].replace('\\%', '%'))) + elif match and match.groupdict()["valid_wildcard"]: + return StartsWith(result.column, StringLiteral(literal_like.value[:-1].replace("\\%", "%"))) else: - return EqualTo(result.column, StringLiteral(literal_like.value.replace('\\%', '%'))) + return EqualTo(result.column, StringLiteral(literal_like.value.replace("\\%", "%"))) predicate = (comparison | in_check | null_check | nan_check | starts_check | boolean).set_results_name("predicate") diff --git a/pyiceberg/io/__init__.py b/pyiceberg/io/__init__.py index 4b5e99d336..36c3e625c8 100644 --- a/pyiceberg/io/__init__.py +++ b/pyiceberg/io/__init__.py @@ -53,10 +53,18 @@ S3_REGION = "s3.region" S3_PROXY_URI = "s3.proxy-uri" S3_CONNECT_TIMEOUT = "s3.connect-timeout" +S3_SIGNER_URI = "s3.signer.uri" HDFS_HOST = "hdfs.host" HDFS_PORT = "hdfs.port" HDFS_USER = "hdfs.user" HDFS_KERB_TICKET = "hdfs.kerberos_ticket" +ADLFS_CONNECTION_STRING = "adlfs.connection-string" +ADLFS_ACCOUNT_NAME = "adlfs.account-name" +ADLFS_ACCOUNT_KEY = "adlfs.account-key" +ADLFS_SAS_TOKEN = "adlfs.sas-token" +ADLFS_TENANT_ID = "adlfs.tenant-id" +ADLFS_CLIENT_ID = "adlfs.client-id" +ADLFS_ClIENT_SECRET = "adlfs.client-secret" GCS_TOKEN = "gcs.oauth2.token" GCS_TOKEN_EXPIRES_AT_MS = "gcs.oauth2.token-expires-at" GCS_PROJECT_ID = "gcs.project-id" @@ -277,6 +285,7 @@ def delete(self, location: Union[str, InputFile, OutputFile]) -> None: "gs": [ARROW_FILE_IO], "file": [ARROW_FILE_IO, FSSPEC_FILE_IO], "hdfs": [ARROW_FILE_IO], + "viewfs": [ARROW_FILE_IO], "abfs": [FSSPEC_FILE_IO], "abfss": [FSSPEC_FILE_IO], } diff --git a/pyiceberg/io/fsspec.py b/pyiceberg/io/fsspec.py index ee97829c2e..bb76f043c9 100644 --- a/pyiceberg/io/fsspec.py +++ b/pyiceberg/io/fsspec.py @@ -40,6 +40,12 @@ from pyiceberg.catalog import TOKEN from pyiceberg.exceptions import SignError from pyiceberg.io import ( + ADLFS_ACCOUNT_KEY, + ADLFS_ACCOUNT_NAME, + ADLFS_CLIENT_ID, + ADLFS_CONNECTION_STRING, + ADLFS_SAS_TOKEN, + ADLFS_TENANT_ID, GCS_ACCESS, GCS_CACHE_TIMEOUT, GCS_CONSISTENCY, @@ -57,6 +63,8 @@ S3_REGION, S3_SECRET_ACCESS_KEY, S3_SESSION_TOKEN, + S3_SIGNER_URI, + ADLFS_ClIENT_SECRET, FileIO, InputFile, InputStream, @@ -72,7 +80,7 @@ def s3v4_rest_signer(properties: Properties, request: AWSRequest, **_: Any) -> A if TOKEN not in properties: raise SignError("Signer set, but token is not available") - signer_url = properties["uri"].rstrip("/") + signer_url = properties.get(S3_SIGNER_URI, properties["uri"]).rstrip("/") signer_headers = {"Authorization": f"Bearer {properties[TOKEN]}"} signer_body = { "method": request.method, @@ -163,13 +171,13 @@ def _adlfs(properties: Properties) -> AbstractFileSystem: from adlfs import AzureBlobFileSystem return AzureBlobFileSystem( - connection_string=properties.get("adlfs.connection-string"), - account_name=properties.get("adlfs.account-name"), - account_key=properties.get("adlfs.account-key"), - sas_token=properties.get("adlfs.sas-token"), - tenant_id=properties.get("adlfs.tenant-id"), - client_id=properties.get("adlfs.client-id"), - client_secret=properties.get("adlfs.client-secret"), + connection_string=properties.get(ADLFS_CONNECTION_STRING), + account_name=properties.get(ADLFS_ACCOUNT_NAME), + account_key=properties.get(ADLFS_ACCOUNT_KEY), + sas_token=properties.get(ADLFS_SAS_TOKEN), + tenant_id=properties.get(ADLFS_TENANT_ID), + client_id=properties.get(ADLFS_CLIENT_ID), + client_secret=properties.get(ADLFS_ClIENT_SECRET), ) diff --git a/pyiceberg/io/pyarrow.py b/pyiceberg/io/pyarrow.py index 72b386d25a..04f30ec63e 100644 --- a/pyiceberg/io/pyarrow.py +++ b/pyiceberg/io/pyarrow.py @@ -332,7 +332,7 @@ def parse_location(location: str) -> Tuple[str, str, str]: uri = urlparse(location) if not uri.scheme: return "file", uri.netloc, os.path.abspath(location) - elif uri.scheme == "hdfs": + elif uri.scheme in ("hdfs", "viewfs"): return uri.scheme, uri.netloc, uri.path else: return uri.scheme, uri.netloc, f"{uri.netloc}{uri.path}" @@ -356,12 +356,12 @@ def _initialize_fs(self, scheme: str, netloc: Optional[str] = None) -> FileSyste client_kwargs["connect_timeout"] = float(connect_timeout) return S3FileSystem(**client_kwargs) - elif scheme == "hdfs": + elif scheme in ("hdfs", "viewfs"): from pyarrow.fs import HadoopFileSystem hdfs_kwargs: Dict[str, Any] = {} if netloc: - return HadoopFileSystem.from_uri(f"hdfs://{netloc}") + return HadoopFileSystem.from_uri(f"{scheme}://{netloc}") if host := self.properties.get(HDFS_HOST): hdfs_kwargs["host"] = host if port := self.properties.get(HDFS_PORT): @@ -731,6 +731,16 @@ def _(obj: pa.MapType, visitor: PyArrowSchemaVisitor[T]) -> T: return visitor.map(obj, key_result, value_result) +@visit_pyarrow.register(pa.DictionaryType) +def _(obj: pa.DictionaryType, visitor: PyArrowSchemaVisitor[T]) -> T: + # Parquet has no dictionary type. dictionary-encoding is handled + # as an encoding detail, not as a separate type. + # We will follow this approach in determining the Iceberg Type, + # as we only support parquet in PyIceberg for now. + logger.warning(f"Iceberg does not have a dictionary type. {type(obj)} will be inferred as {obj.value_type} on read.") + return visit_pyarrow(obj.value_type, visitor) + + @visit_pyarrow.register(pa.DataType) def _(obj: pa.DataType, visitor: PyArrowSchemaVisitor[T]) -> T: if pa.types.is_nested(obj): diff --git a/pyiceberg/manifest.py b/pyiceberg/manifest.py index 3b8138b61a..defe5958c5 100644 --- a/pyiceberg/manifest.py +++ b/pyiceberg/manifest.py @@ -18,6 +18,7 @@ import math from abc import ABC, abstractmethod +from copy import copy from enum import Enum from types import TracebackType from typing import ( @@ -909,7 +910,7 @@ def __init__(self, output_file: OutputFile, snapshot_id: int, parent_snapshot_id self._sequence_number = sequence_number def prepare_manifest(self, manifest_file: ManifestFile) -> ManifestFile: - wrapped_manifest_file = ManifestFile(*manifest_file.record_fields()) + wrapped_manifest_file = copy(manifest_file) if wrapped_manifest_file.sequence_number == UNASSIGNED_SEQ: # if the sequence number is being assigned here, then the manifest must be created by the current operation. diff --git a/pyiceberg/partitioning.py b/pyiceberg/partitioning.py index a3cf255341..da52d5df8e 100644 --- a/pyiceberg/partitioning.py +++ b/pyiceberg/partitioning.py @@ -229,11 +229,11 @@ def partition_to_path(self, data: Record, schema: Schema) -> str: field_strs = [] value_strs = [] - for pos, value in enumerate(data.record_fields()): + for pos in range(len(self.fields)): partition_field = self.fields[pos] - value_str = partition_field.transform.to_human_string(field_types[pos].field_type, value=value) + value_str = partition_field.transform.to_human_string(field_types[pos].field_type, value=data[pos]) - value_str = quote(value_str, safe='') + value_str = quote(value_str, safe="") value_strs.append(value_str) field_strs.append(partition_field.name) @@ -387,7 +387,7 @@ def partition(self) -> Record: # partition key transformed with iceberg interna for raw_partition_field_value in self.raw_partition_field_values: partition_fields = self.partition_spec.source_id_to_fields_map[raw_partition_field_value.field.source_id] if len(partition_fields) != 1: - raise ValueError("partition_fields must contain exactly one field.") + raise ValueError(f"Cannot have redundant partitions: {partition_fields}") partition_field = partition_fields[0] iceberg_typed_key_values[partition_field.name] = partition_record_value( partition_field=partition_field, diff --git a/pyiceberg/schema.py b/pyiceberg/schema.py index b2739d8618..77f1addbf5 100644 --- a/pyiceberg/schema.py +++ b/pyiceberg/schema.py @@ -1311,11 +1311,11 @@ def _valid_avro_name(name: str) -> bool: length = len(name) assert length > 0, ValueError("Can not validate empty avro name") first = name[0] - if not (first.isalpha() or first == '_'): + if not (first.isalpha() or first == "_"): return False for character in name[1:]: - if not (character.isalnum() or character == '_'): + if not (character.isalnum() or character == "_"): return False return True @@ -1323,17 +1323,17 @@ def _valid_avro_name(name: str) -> bool: def _sanitize_name(name: str) -> str: sb = [] first = name[0] - if not (first.isalpha() or first == '_'): + if not (first.isalpha() or first == "_"): sb.append(_sanitize_char(first)) else: sb.append(first) for character in name[1:]: - if not (character.isalnum() or character == '_'): + if not (character.isalnum() or character == "_"): sb.append(_sanitize_char(character)) else: sb.append(character) - return ''.join(sb) + return "".join(sb) def _sanitize_char(character: str) -> str: diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py index 13186c42cc..f160ab2441 100644 --- a/pyiceberg/table/__init__.py +++ b/pyiceberg/table/__init__.py @@ -71,6 +71,7 @@ ManifestEntry, ManifestEntryStatus, ManifestFile, + PartitionFieldSummary, write_manifest, write_manifest_list, ) @@ -251,6 +252,16 @@ def property_as_int(properties: Dict[str, str], property_name: str, default: Opt else: return default + @staticmethod + def property_as_float(properties: Dict[str, str], property_name: str, default: Optional[float] = None) -> Optional[float]: + if value := properties.get(property_name): + try: + return float(value) + except ValueError as e: + raise ValueError(f"Could not parse table property {property_name} to a float: {value}") from e + else: + return default + @staticmethod def property_as_bool(properties: Dict[str, str], property_name: str, default: bool) -> bool: if value := properties.get(property_name): @@ -381,10 +392,11 @@ def append(self, df: pa.Table, snapshot_properties: Dict[str, str] = EMPTY_DICT) if not isinstance(df, pa.Table): raise ValueError(f"Expected PyArrow table, got: {df}") - supported_transforms = {IdentityTransform} - if not all(type(field.transform) in supported_transforms for field in self.table_metadata.spec().fields): + if unsupported_partitions := [ + field for field in self.table_metadata.spec().fields if not field.transform.supports_pyarrow_transform + ]: raise ValueError( - f"All transforms are not supported, expected: {supported_transforms}, but get: {[str(field) for field in self.table_metadata.spec().fields if field.transform not in supported_transforms]}." + f"Not all partition types are supported for writes. Following partitions cannot be written using pyarrow: {unsupported_partitions}." ) _check_schema_compatible(self._table.schema(), other_schema=df.schema) @@ -443,7 +455,7 @@ def overwrite( for data_file in data_files: update_snapshot.append_data_file(data_file) - def add_files(self, file_paths: List[str]) -> None: + def add_files(self, file_paths: List[str], snapshot_properties: Dict[str, str] = EMPTY_DICT) -> None: """ Shorthand API for adding files as data files to the table transaction. @@ -455,7 +467,7 @@ def add_files(self, file_paths: List[str]) -> None: """ if self._table.name_mapping() is None: self.set_properties(**{TableProperties.DEFAULT_NAME_MAPPING: self._table.schema().name_mapping.model_dump_json()}) - with self.update_snapshot().fast_append() as update_snapshot: + with self.update_snapshot(snapshot_properties=snapshot_properties).fast_append() as update_snapshot: data_files = _parquet_files_to_data_files( table_metadata=self._table.metadata, file_paths=file_paths, io=self._table.io ) @@ -557,17 +569,17 @@ def commit_transaction(self) -> Table: class AssignUUIDUpdate(IcebergBaseModel): - action: Literal['assign-uuid'] = Field(default="assign-uuid") + action: Literal["assign-uuid"] = Field(default="assign-uuid") uuid: uuid.UUID class UpgradeFormatVersionUpdate(IcebergBaseModel): - action: Literal['upgrade-format-version'] = Field(default="upgrade-format-version") + action: Literal["upgrade-format-version"] = Field(default="upgrade-format-version") format_version: int = Field(alias="format-version") class AddSchemaUpdate(IcebergBaseModel): - action: Literal['add-schema'] = Field(default="add-schema") + action: Literal["add-schema"] = Field(default="add-schema") schema_: Schema = Field(alias="schema") # This field is required: https://github.com/apache/iceberg/pull/7445 last_column_id: int = Field(alias="last-column-id") @@ -576,47 +588,47 @@ class AddSchemaUpdate(IcebergBaseModel): class SetCurrentSchemaUpdate(IcebergBaseModel): - action: Literal['set-current-schema'] = Field(default="set-current-schema") + action: Literal["set-current-schema"] = Field(default="set-current-schema") schema_id: int = Field( alias="schema-id", description="Schema ID to set as current, or -1 to set last added schema", default=-1 ) class AddPartitionSpecUpdate(IcebergBaseModel): - action: Literal['add-spec'] = Field(default="add-spec") + action: Literal["add-spec"] = Field(default="add-spec") spec: PartitionSpec initial_change: bool = Field(default=False, exclude=True) class SetDefaultSpecUpdate(IcebergBaseModel): - action: Literal['set-default-spec'] = Field(default="set-default-spec") + action: Literal["set-default-spec"] = Field(default="set-default-spec") spec_id: int = Field( alias="spec-id", description="Partition spec ID to set as the default, or -1 to set last added spec", default=-1 ) class AddSortOrderUpdate(IcebergBaseModel): - action: Literal['add-sort-order'] = Field(default="add-sort-order") + action: Literal["add-sort-order"] = Field(default="add-sort-order") sort_order: SortOrder = Field(alias="sort-order") initial_change: bool = Field(default=False, exclude=True) class SetDefaultSortOrderUpdate(IcebergBaseModel): - action: Literal['set-default-sort-order'] = Field(default="set-default-sort-order") + action: Literal["set-default-sort-order"] = Field(default="set-default-sort-order") sort_order_id: int = Field( alias="sort-order-id", description="Sort order ID to set as the default, or -1 to set last added sort order", default=-1 ) class AddSnapshotUpdate(IcebergBaseModel): - action: Literal['add-snapshot'] = Field(default="add-snapshot") + action: Literal["add-snapshot"] = Field(default="add-snapshot") snapshot: Snapshot class SetSnapshotRefUpdate(IcebergBaseModel): - action: Literal['set-snapshot-ref'] = Field(default="set-snapshot-ref") + action: Literal["set-snapshot-ref"] = Field(default="set-snapshot-ref") ref_name: str = Field(alias="ref-name") type: Literal["tag", "branch"] snapshot_id: int = Field(alias="snapshot-id") @@ -626,31 +638,31 @@ class SetSnapshotRefUpdate(IcebergBaseModel): class RemoveSnapshotsUpdate(IcebergBaseModel): - action: Literal['remove-snapshots'] = Field(default="remove-snapshots") + action: Literal["remove-snapshots"] = Field(default="remove-snapshots") snapshot_ids: List[int] = Field(alias="snapshot-ids") class RemoveSnapshotRefUpdate(IcebergBaseModel): - action: Literal['remove-snapshot-ref'] = Field(default="remove-snapshot-ref") + action: Literal["remove-snapshot-ref"] = Field(default="remove-snapshot-ref") ref_name: str = Field(alias="ref-name") class SetLocationUpdate(IcebergBaseModel): - action: Literal['set-location'] = Field(default="set-location") + action: Literal["set-location"] = Field(default="set-location") location: str class SetPropertiesUpdate(IcebergBaseModel): - action: Literal['set-properties'] = Field(default="set-properties") + action: Literal["set-properties"] = Field(default="set-properties") updates: Dict[str, str] - @field_validator('updates', mode='before') + @field_validator("updates", mode="before") def transform_properties_dict_value_to_str(cls, properties: Properties) -> Dict[str, str]: return transform_dict_value_to_str(properties) class RemovePropertiesUpdate(IcebergBaseModel): - action: Literal['remove-properties'] = Field(default="remove-properties") + action: Literal["remove-properties"] = Field(default="remove-properties") removals: List[str] @@ -672,7 +684,7 @@ class RemovePropertiesUpdate(IcebergBaseModel): SetPropertiesUpdate, RemovePropertiesUpdate, ], - Field(discriminator='action'), + Field(discriminator="action"), ] @@ -1131,7 +1143,7 @@ def validate(self, base_metadata: Optional[TableMetadata]) -> None: AssertDefaultSpecId, AssertDefaultSortOrderId, ], - Field(discriminator='type'), + Field(discriminator="type"), ] UpdatesAndRequirements = Tuple[Tuple[TableUpdate, ...], Tuple[TableRequirement, ...]] @@ -1142,7 +1154,7 @@ class Namespace(IcebergRootModel[List[str]]): root: List[str] = Field( ..., - description='Reference to one or more levels of a namespace', + description="Reference to one or more levels of a namespace", ) @@ -1341,7 +1353,7 @@ def overwrite( with self.transaction() as tx: tx.overwrite(df=df, overwrite_filter=overwrite_filter, snapshot_properties=snapshot_properties) - def add_files(self, file_paths: List[str]) -> None: + def add_files(self, file_paths: List[str], snapshot_properties: Dict[str, str] = EMPTY_DICT) -> None: """ Shorthand API for adding files as data files to the table. @@ -1352,7 +1364,7 @@ def add_files(self, file_paths: List[str]) -> None: FileNotFoundError: If the file does not exist. """ with self.transaction() as tx: - tx.add_files(file_paths=file_paths) + tx.add_files(file_paths=file_paths, snapshot_properties=snapshot_properties) def update_spec(self, case_sensitive: bool = True) -> UpdateSpec: return UpdateSpec(Transaction(self, autocommit=True), case_sensitive=case_sensitive) @@ -1782,7 +1794,7 @@ class Move: other_field_id: Optional[int] = None -U = TypeVar('U') +U = TypeVar("U") class UpdateTableMetadata(ABC, Generic[U]): @@ -2671,13 +2683,13 @@ class AddFileTask: def _new_manifest_path(location: str, num: int, commit_uuid: uuid.UUID) -> str: - return f'{location}/metadata/{commit_uuid}-m{num}.avro' + return f"{location}/metadata/{commit_uuid}-m{num}.avro" def _generate_manifest_list_path(location: str, snapshot_id: int, attempt: int, commit_uuid: uuid.UUID) -> str: # Mimics the behavior in Java: # https://github.com/apache/iceberg/blob/c862b9177af8e2d83122220764a056f3b96fd00c/core/src/main/java/org/apache/iceberg/SnapshotProducer.java#L491 - return f'{location}/metadata/snap-{snapshot_id}-{attempt}-{commit_uuid}.avro' + return f"{location}/metadata/snap-{snapshot_id}-{attempt}-{commit_uuid}.avro" def _dataframe_to_data_files( @@ -3231,7 +3243,7 @@ def _partition_field(self, transform_key: Tuple[int, Transform[Any, Any]], name: new_field_id = self._new_field_id() if name is None: - tmp_field = PartitionField(transform_key[0], new_field_id, transform_key[1], 'unassigned_field_name') + tmp_field = PartitionField(transform_key[0], new_field_id, transform_key[1], "unassigned_field_name") name = _visit_partition_field(self._transaction.table_metadata.schema(), tmp_field, _PartitionNameGenerator()) return PartitionField(transform_key[0], new_field_id, transform_key[1], name) @@ -3270,12 +3282,12 @@ def snapshots(self) -> "pa.Table": import pyarrow as pa snapshots_schema = pa.schema([ - pa.field('committed_at', pa.timestamp(unit='ms'), nullable=False), - pa.field('snapshot_id', pa.int64(), nullable=False), - pa.field('parent_id', pa.int64(), nullable=True), - pa.field('operation', pa.string(), nullable=True), - pa.field('manifest_list', pa.string(), nullable=False), - pa.field('summary', pa.map_(pa.string(), pa.string()), nullable=True), + pa.field("committed_at", pa.timestamp(unit="ms"), nullable=False), + pa.field("snapshot_id", pa.int64(), nullable=False), + pa.field("parent_id", pa.int64(), nullable=True), + pa.field("operation", pa.string(), nullable=True), + pa.field("manifest_list", pa.string(), nullable=False), + pa.field("summary", pa.map_(pa.string(), pa.string()), nullable=True), ]) snapshots = [] for snapshot in self.tbl.metadata.snapshots: @@ -3287,12 +3299,12 @@ def snapshots(self) -> "pa.Table": additional_properties = None snapshots.append({ - 'committed_at': datetime.utcfromtimestamp(snapshot.timestamp_ms / 1000.0), - 'snapshot_id': snapshot.snapshot_id, - 'parent_id': snapshot.parent_snapshot_id, - 'operation': str(operation), - 'manifest_list': snapshot.manifest_list, - 'summary': additional_properties, + "committed_at": datetime.utcfromtimestamp(snapshot.timestamp_ms / 1000.0), + "snapshot_id": snapshot.snapshot_id, + "parent_id": snapshot.parent_snapshot_id, + "operation": str(operation), + "manifest_list": snapshot.manifest_list, + "summary": additional_properties, }) return pa.Table.from_pylist( @@ -3329,33 +3341,33 @@ def _readable_metrics_struct(bound_type: PrimitiveType) -> pa.StructType: pa_record_struct = schema_to_pyarrow(partition_record) entries_schema = pa.schema([ - pa.field('status', pa.int8(), nullable=False), - pa.field('snapshot_id', pa.int64(), nullable=False), - pa.field('sequence_number', pa.int64(), nullable=False), - pa.field('file_sequence_number', pa.int64(), nullable=False), + pa.field("status", pa.int8(), nullable=False), + pa.field("snapshot_id", pa.int64(), nullable=False), + pa.field("sequence_number", pa.int64(), nullable=False), + pa.field("file_sequence_number", pa.int64(), nullable=False), pa.field( - 'data_file', + "data_file", pa.struct([ - pa.field('content', pa.int8(), nullable=False), - pa.field('file_path', pa.string(), nullable=False), - pa.field('file_format', pa.string(), nullable=False), - pa.field('partition', pa_record_struct, nullable=False), - pa.field('record_count', pa.int64(), nullable=False), - pa.field('file_size_in_bytes', pa.int64(), nullable=False), - pa.field('column_sizes', pa.map_(pa.int32(), pa.int64()), nullable=True), - pa.field('value_counts', pa.map_(pa.int32(), pa.int64()), nullable=True), - pa.field('null_value_counts', pa.map_(pa.int32(), pa.int64()), nullable=True), - pa.field('nan_value_counts', pa.map_(pa.int32(), pa.int64()), nullable=True), - pa.field('lower_bounds', pa.map_(pa.int32(), pa.binary()), nullable=True), - pa.field('upper_bounds', pa.map_(pa.int32(), pa.binary()), nullable=True), - pa.field('key_metadata', pa.binary(), nullable=True), - pa.field('split_offsets', pa.list_(pa.int64()), nullable=True), - pa.field('equality_ids', pa.list_(pa.int32()), nullable=True), - pa.field('sort_order_id', pa.int32(), nullable=True), + pa.field("content", pa.int8(), nullable=False), + pa.field("file_path", pa.string(), nullable=False), + pa.field("file_format", pa.string(), nullable=False), + pa.field("partition", pa_record_struct, nullable=False), + pa.field("record_count", pa.int64(), nullable=False), + pa.field("file_size_in_bytes", pa.int64(), nullable=False), + pa.field("column_sizes", pa.map_(pa.int32(), pa.int64()), nullable=True), + pa.field("value_counts", pa.map_(pa.int32(), pa.int64()), nullable=True), + pa.field("null_value_counts", pa.map_(pa.int32(), pa.int64()), nullable=True), + pa.field("nan_value_counts", pa.map_(pa.int32(), pa.int64()), nullable=True), + pa.field("lower_bounds", pa.map_(pa.int32(), pa.binary()), nullable=True), + pa.field("upper_bounds", pa.map_(pa.int32(), pa.binary()), nullable=True), + pa.field("key_metadata", pa.binary(), nullable=True), + pa.field("split_offsets", pa.list_(pa.int64()), nullable=True), + pa.field("equality_ids", pa.list_(pa.int32()), nullable=True), + pa.field("sort_order_id", pa.int32(), nullable=True), ]), nullable=False, ), - pa.field('readable_metrics', pa.struct(readable_metrics_struct), nullable=True), + pa.field("readable_metrics", pa.struct(readable_metrics_struct), nullable=True), ]) entries = [] @@ -3392,11 +3404,11 @@ def _readable_metrics_struct(bound_type: PrimitiveType) -> pa.StructType: } entries.append({ - 'status': entry.status.value, - 'snapshot_id': entry.snapshot_id, - 'sequence_number': entry.data_sequence_number, - 'file_sequence_number': entry.file_sequence_number, - 'data_file': { + "status": entry.status.value, + "snapshot_id": entry.snapshot_id, + "sequence_number": entry.data_sequence_number, + "file_sequence_number": entry.file_sequence_number, + "data_file": { "content": entry.data_file.content, "file_path": entry.data_file.file_path, "file_format": entry.data_file.file_format, @@ -3415,7 +3427,7 @@ def _readable_metrics_struct(bound_type: PrimitiveType) -> pa.StructType: "sort_order_id": entry.data_file.sort_order_id, "spec_id": entry.data_file.spec_id, }, - 'readable_metrics': readable_metrics, + "readable_metrics": readable_metrics, }) return pa.Table.from_pylist( @@ -3427,24 +3439,24 @@ def refs(self) -> "pa.Table": import pyarrow as pa ref_schema = pa.schema([ - pa.field('name', pa.string(), nullable=False), - pa.field('type', pa.dictionary(pa.int32(), pa.string()), nullable=False), - pa.field('snapshot_id', pa.int64(), nullable=False), - pa.field('max_reference_age_in_ms', pa.int64(), nullable=True), - pa.field('min_snapshots_to_keep', pa.int32(), nullable=True), - pa.field('max_snapshot_age_in_ms', pa.int64(), nullable=True), + pa.field("name", pa.string(), nullable=False), + pa.field("type", pa.dictionary(pa.int32(), pa.string()), nullable=False), + pa.field("snapshot_id", pa.int64(), nullable=False), + pa.field("max_reference_age_in_ms", pa.int64(), nullable=True), + pa.field("min_snapshots_to_keep", pa.int32(), nullable=True), + pa.field("max_snapshot_age_in_ms", pa.int64(), nullable=True), ]) ref_results = [] for ref in self.tbl.metadata.refs: if snapshot_ref := self.tbl.metadata.refs.get(ref): ref_results.append({ - 'name': ref, - 'type': snapshot_ref.snapshot_ref_type.upper(), - 'snapshot_id': snapshot_ref.snapshot_id, - 'max_reference_age_in_ms': snapshot_ref.max_ref_age_ms, - 'min_snapshots_to_keep': snapshot_ref.min_snapshots_to_keep, - 'max_snapshot_age_in_ms': snapshot_ref.max_snapshot_age_ms, + "name": ref, + "type": snapshot_ref.snapshot_ref_type.upper(), + "snapshot_id": snapshot_ref.snapshot_id, + "max_reference_age_in_ms": snapshot_ref.max_ref_age_ms, + "min_snapshots_to_keep": snapshot_ref.min_snapshots_to_keep, + "max_snapshot_age_in_ms": snapshot_ref.max_snapshot_age_ms, }) return pa.Table.from_pylist(ref_results, schema=ref_schema) @@ -3455,15 +3467,15 @@ def partitions(self, snapshot_id: Optional[int] = None) -> "pa.Table": from pyiceberg.io.pyarrow import schema_to_pyarrow table_schema = pa.schema([ - pa.field('record_count', pa.int64(), nullable=False), - pa.field('file_count', pa.int32(), nullable=False), - pa.field('total_data_file_size_in_bytes', pa.int64(), nullable=False), - pa.field('position_delete_record_count', pa.int64(), nullable=False), - pa.field('position_delete_file_count', pa.int32(), nullable=False), - pa.field('equality_delete_record_count', pa.int64(), nullable=False), - pa.field('equality_delete_file_count', pa.int32(), nullable=False), - pa.field('last_updated_at', pa.timestamp(unit='ms'), nullable=True), - pa.field('last_updated_snapshot_id', pa.int64(), nullable=True), + pa.field("record_count", pa.int64(), nullable=False), + pa.field("file_count", pa.int32(), nullable=False), + pa.field("total_data_file_size_in_bytes", pa.int64(), nullable=False), + pa.field("position_delete_record_count", pa.int64(), nullable=False), + pa.field("position_delete_file_count", pa.int32(), nullable=False), + pa.field("equality_delete_record_count", pa.int64(), nullable=False), + pa.field("equality_delete_file_count", pa.int32(), nullable=False), + pa.field("last_updated_at", pa.timestamp(unit="ms"), nullable=True), + pa.field("last_updated_snapshot_id", pa.int64(), nullable=True), ]) partition_record = self.tbl.metadata.specs_struct() @@ -3472,8 +3484,8 @@ def partitions(self, snapshot_id: Optional[int] = None) -> "pa.Table": if has_partitions: pa_record_struct = schema_to_pyarrow(partition_record) partitions_schema = pa.schema([ - pa.field('partition', pa_record_struct, nullable=False), - pa.field('spec_id', pa.int32(), nullable=False), + pa.field("partition", pa_record_struct, nullable=False), + pa.field("spec_id", pa.int32(), nullable=False), ]) table_schema = pa.unify_schemas([partitions_schema, table_schema]) @@ -3537,38 +3549,99 @@ def update_partitions_map( schema=table_schema, ) + def manifests(self) -> "pa.Table": + import pyarrow as pa -@dataclass(frozen=True) -class TablePartition: - partition_key: PartitionKey - arrow_table_partition: pa.Table + from pyiceberg.conversions import from_bytes + partition_summary_schema = pa.struct([ + pa.field("contains_null", pa.bool_(), nullable=False), + pa.field("contains_nan", pa.bool_(), nullable=True), + pa.field("lower_bound", pa.string(), nullable=True), + pa.field("upper_bound", pa.string(), nullable=True), + ]) -def _get_partition_sort_order(partition_columns: list[str], reverse: bool = False) -> dict[str, Any]: - order = 'ascending' if not reverse else 'descending' - null_placement = 'at_start' if reverse else 'at_end' - return {'sort_keys': [(column_name, order) for column_name in partition_columns], 'null_placement': null_placement} + manifest_schema = pa.schema([ + pa.field("content", pa.int8(), nullable=False), + pa.field("path", pa.string(), nullable=False), + pa.field("length", pa.int64(), nullable=False), + pa.field("partition_spec_id", pa.int32(), nullable=False), + pa.field("added_snapshot_id", pa.int64(), nullable=False), + pa.field("added_data_files_count", pa.int32(), nullable=False), + pa.field("existing_data_files_count", pa.int32(), nullable=False), + pa.field("deleted_data_files_count", pa.int32(), nullable=False), + pa.field("added_delete_files_count", pa.int32(), nullable=False), + pa.field("existing_delete_files_count", pa.int32(), nullable=False), + pa.field("deleted_delete_files_count", pa.int32(), nullable=False), + pa.field("partition_summaries", pa.list_(partition_summary_schema), nullable=False), + ]) + def _partition_summaries_to_rows( + spec: PartitionSpec, partition_summaries: List[PartitionFieldSummary] + ) -> List[Dict[str, Any]]: + rows = [] + for i, field_summary in enumerate(partition_summaries): + field = spec.fields[i] + partition_field_type = spec.partition_type(self.tbl.schema()).fields[i].field_type + lower_bound = ( + ( + field.transform.to_human_string( + partition_field_type, from_bytes(partition_field_type, field_summary.lower_bound) + ) + ) + if field_summary.lower_bound + else None + ) + upper_bound = ( + ( + field.transform.to_human_string( + partition_field_type, from_bytes(partition_field_type, field_summary.upper_bound) + ) + ) + if field_summary.upper_bound + else None + ) + rows.append({ + "contains_null": field_summary.contains_null, + "contains_nan": field_summary.contains_nan, + "lower_bound": lower_bound, + "upper_bound": upper_bound, + }) + return rows -def group_by_partition_scheme(arrow_table: pa.Table, partition_columns: list[str]) -> pa.Table: - """Given a table, sort it by current partition scheme.""" - # only works for identity for now - sort_options = _get_partition_sort_order(partition_columns, reverse=False) - sorted_arrow_table = arrow_table.sort_by(sorting=sort_options['sort_keys'], null_placement=sort_options['null_placement']) - return sorted_arrow_table + specs = self.tbl.metadata.specs() + manifests = [] + if snapshot := self.tbl.metadata.current_snapshot(): + for manifest in snapshot.manifests(self.tbl.io): + is_data_file = manifest.content == ManifestContent.DATA + is_delete_file = manifest.content == ManifestContent.DELETES + manifests.append({ + "content": manifest.content, + "path": manifest.manifest_path, + "length": manifest.manifest_length, + "partition_spec_id": manifest.partition_spec_id, + "added_snapshot_id": manifest.added_snapshot_id, + "added_data_files_count": manifest.added_files_count if is_data_file else 0, + "existing_data_files_count": manifest.existing_files_count if is_data_file else 0, + "deleted_data_files_count": manifest.deleted_files_count if is_data_file else 0, + "added_delete_files_count": manifest.added_files_count if is_delete_file else 0, + "existing_delete_files_count": manifest.existing_files_count if is_delete_file else 0, + "deleted_delete_files_count": manifest.deleted_files_count if is_delete_file else 0, + "partition_summaries": _partition_summaries_to_rows(specs[manifest.partition_spec_id], manifest.partitions) + if manifest.partitions + else [], + }) + + return pa.Table.from_pylist( + manifests, + schema=manifest_schema, + ) -def get_partition_columns( - spec: PartitionSpec, - schema: Schema, -) -> list[str]: - partition_cols = [] - for partition_field in spec.fields: - column_name = schema.find_column_name(partition_field.source_id) - if not column_name: - raise ValueError(f"{partition_field=} could not be found in {schema}.") - partition_cols.append(column_name) - return partition_cols +@dataclass(frozen=True) +class TablePartition: + partition_key: PartitionKey + arrow_table_partition: pa.Table def _get_table_partitions( @@ -3577,7 +3650,7 @@ def _get_table_partitions( schema: Schema, slice_instructions: list[dict[str, Any]], ) -> list[TablePartition]: - sorted_slice_instructions = sorted(slice_instructions, key=lambda x: x['offset']) + sorted_slice_instructions = sorted(slice_instructions, key=lambda x: x["offset"]) partition_fields = partition_spec.fields @@ -3625,13 +3698,30 @@ def _determine_partitions(spec: PartitionSpec, schema: Schema, arrow_table: pa.T """ import pyarrow as pa - partition_columns = get_partition_columns(spec=spec, schema=schema) - arrow_table = group_by_partition_scheme(arrow_table, partition_columns) - - reversing_sort_order_options = _get_partition_sort_order(partition_columns, reverse=True) - reversed_indices = pa.compute.sort_indices(arrow_table, **reversing_sort_order_options).to_pylist() - - slice_instructions: list[dict[str, Any]] = [] + partition_columns: List[Tuple[PartitionField, NestedField]] = [ + (partition_field, schema.find_field(partition_field.source_id)) for partition_field in spec.fields + ] + partition_values_table = pa.table({ + str(partition.field_id): partition.transform.pyarrow_transform(field.field_type)(arrow_table[field.name]) + for partition, field in partition_columns + }) + + # Sort by partitions + sort_indices = pa.compute.sort_indices( + partition_values_table, + sort_keys=[(col, "ascending") for col in partition_values_table.column_names], + null_placement="at_end", + ).to_pylist() + arrow_table = arrow_table.take(sort_indices) + + # Get slice_instructions to group by partitions + partition_values_table = partition_values_table.take(sort_indices) + reversed_indices = pa.compute.sort_indices( + partition_values_table, + sort_keys=[(col, "descending") for col in partition_values_table.column_names], + null_placement="at_start", + ).to_pylist() + slice_instructions: List[Dict[str, Any]] = [] last = len(reversed_indices) reversed_indices_size = len(reversed_indices) ptr = 0 @@ -3642,6 +3732,6 @@ def _determine_partitions(spec: PartitionSpec, schema: Schema, arrow_table: pa.T last = reversed_indices[ptr] ptr = ptr + group_size - table_partitions: list[TablePartition] = _get_table_partitions(arrow_table, spec, schema, slice_instructions) + table_partitions: List[TablePartition] = _get_table_partitions(arrow_table, spec, schema, slice_instructions) return table_partitions diff --git a/pyiceberg/table/metadata.py b/pyiceberg/table/metadata.py index ba0c885758..8c3c389318 100644 --- a/pyiceberg/table/metadata.py +++ b/pyiceberg/table/metadata.py @@ -222,7 +222,7 @@ class TableMetadataCommonFields(IcebergBaseModel): current-snapshot-id even if the refs map is null.""" # validators - @field_validator('properties', mode='before') + @field_validator("properties", mode="before") def transform_properties_dict_value_to_str(cls, properties: Properties) -> Dict[str, str]: return transform_dict_value_to_str(properties) @@ -305,7 +305,7 @@ def sort_order_by_id(self, sort_order_id: int) -> Optional[SortOrder]: """Get the sort order by sort_order_id.""" return next((sort_order for sort_order in self.sort_orders if sort_order.order_id == sort_order_id), None) - @field_serializer('current_snapshot_id') + @field_serializer("current_snapshot_id") def serialize_current_snapshot_id(self, current_snapshot_id: Optional[int]) -> Optional[int]: if current_snapshot_id is None and Config().get_bool("legacy-current-snapshot-id"): return -1 @@ -319,7 +319,7 @@ def _generate_snapshot_id() -> int: """ rnd_uuid = uuid.uuid4() snapshot_id = int.from_bytes( - bytes(lhs ^ rhs for lhs, rhs in zip(rnd_uuid.bytes[0:8], rnd_uuid.bytes[8:16])), byteorder='little', signed=True + bytes(lhs ^ rhs for lhs, rhs in zip(rnd_uuid.bytes[0:8], rnd_uuid.bytes[8:16])), byteorder="little", signed=True ) snapshot_id = snapshot_id if snapshot_id >= 0 else snapshot_id * -1 diff --git a/pyiceberg/table/name_mapping.py b/pyiceberg/table/name_mapping.py index baa15f168d..5a4e769003 100644 --- a/pyiceberg/table/name_mapping.py +++ b/pyiceberg/table/name_mapping.py @@ -40,12 +40,12 @@ class MappedField(IcebergBaseModel): names: List[str] = conlist(str, min_length=1) fields: List[MappedField] = Field(default_factory=list) - @field_validator('fields', mode='before') + @field_validator("fields", mode="before") @classmethod def convert_null_to_empty_List(cls, v: Any) -> Any: return v or [] - @field_validator('names', mode='after') + @field_validator("names", mode="after") @classmethod def check_at_least_one(cls, v: List[str]) -> Any: """ @@ -60,10 +60,10 @@ def check_at_least_one(cls, v: List[str]) -> Any: @model_serializer def ser_model(self) -> Dict[str, Any]: """Set custom serializer to leave out the field when it is empty.""" - fields = {'fields': self.fields} if len(self.fields) > 0 else {} + fields = {"fields": self.fields} if len(self.fields) > 0 else {} return { - 'field-id': self.field_id, - 'names': self.names, + "field-id": self.field_id, + "names": self.names, **fields, } @@ -87,7 +87,7 @@ def _field_by_name(self) -> Dict[str, MappedField]: return visit_name_mapping(self, _IndexByName()) def find(self, *names: str) -> MappedField: - name = '.'.join(names) + name = ".".join(names) try: return self._field_by_name[name] except KeyError as e: @@ -109,7 +109,7 @@ def __str__(self) -> str: return "[\n " + "\n ".join([str(e) for e in self.root]) + "\n]" -S = TypeVar('S') +S = TypeVar("S") T = TypeVar("T") diff --git a/pyiceberg/table/refs.py b/pyiceberg/table/refs.py index df18fadd31..d87a319a16 100644 --- a/pyiceberg/table/refs.py +++ b/pyiceberg/table/refs.py @@ -46,14 +46,14 @@ class SnapshotRef(IcebergBaseModel): max_snapshot_age_ms: Annotated[Optional[int], Field(alias="max-snapshot-age-ms", default=None, gt=0)] max_ref_age_ms: Annotated[Optional[int], Field(alias="max-ref-age-ms", default=None, gt=0)] - @model_validator(mode='after') - def check_min_snapshots_to_keep(self) -> 'SnapshotRef': + @model_validator(mode="after") + def check_min_snapshots_to_keep(self) -> "SnapshotRef": if self.min_snapshots_to_keep is not None and self.snapshot_ref_type == SnapshotRefType.TAG: raise ValidationError("Tags do not support setting minSnapshotsToKeep") return self - @model_validator(mode='after') - def check_max_snapshot_age_ms(self) -> 'SnapshotRef': + @model_validator(mode="after") + def check_max_snapshot_age_ms(self) -> "SnapshotRef": if self.max_snapshot_age_ms is not None and self.snapshot_ref_type == SnapshotRefType.TAG: raise ValidationError("Tags do not support setting maxSnapshotAgeMs") return self diff --git a/pyiceberg/table/snapshots.py b/pyiceberg/table/snapshots.py index f74ac4b7d4..e2ce3fe4f1 100644 --- a/pyiceberg/table/snapshots.py +++ b/pyiceberg/table/snapshots.py @@ -27,29 +27,29 @@ from pyiceberg.schema import Schema from pyiceberg.typedef import IcebergBaseModel -ADDED_DATA_FILES = 'added-data-files' -ADDED_DELETE_FILES = 'added-delete-files' -ADDED_EQUALITY_DELETES = 'added-equality-deletes' -ADDED_FILE_SIZE = 'added-files-size' -ADDED_POSITION_DELETES = 'added-position-deletes' -ADDED_POSITION_DELETE_FILES = 'added-position-delete-files' -ADDED_RECORDS = 'added-records' -DELETED_DATA_FILES = 'deleted-data-files' -DELETED_RECORDS = 'deleted-records' -ADDED_EQUALITY_DELETE_FILES = 'added-equality-delete-files' -REMOVED_DELETE_FILES = 'removed-delete-files' -REMOVED_EQUALITY_DELETES = 'removed-equality-deletes' -REMOVED_EQUALITY_DELETE_FILES = 'removed-equality-delete-files' -REMOVED_FILE_SIZE = 'removed-files-size' -REMOVED_POSITION_DELETES = 'removed-position-deletes' -REMOVED_POSITION_DELETE_FILES = 'removed-position-delete-files' -TOTAL_EQUALITY_DELETES = 'total-equality-deletes' -TOTAL_POSITION_DELETES = 'total-position-deletes' -TOTAL_DATA_FILES = 'total-data-files' -TOTAL_DELETE_FILES = 'total-delete-files' -TOTAL_RECORDS = 'total-records' -TOTAL_FILE_SIZE = 'total-files-size' -CHANGED_PARTITION_COUNT_PROP = 'changed-partition-count' +ADDED_DATA_FILES = "added-data-files" +ADDED_DELETE_FILES = "added-delete-files" +ADDED_EQUALITY_DELETES = "added-equality-deletes" +ADDED_FILE_SIZE = "added-files-size" +ADDED_POSITION_DELETES = "added-position-deletes" +ADDED_POSITION_DELETE_FILES = "added-position-delete-files" +ADDED_RECORDS = "added-records" +DELETED_DATA_FILES = "deleted-data-files" +DELETED_RECORDS = "deleted-records" +ADDED_EQUALITY_DELETE_FILES = "added-equality-delete-files" +REMOVED_DELETE_FILES = "removed-delete-files" +REMOVED_EQUALITY_DELETES = "removed-equality-deletes" +REMOVED_EQUALITY_DELETE_FILES = "removed-equality-delete-files" +REMOVED_FILE_SIZE = "removed-files-size" +REMOVED_POSITION_DELETES = "removed-position-deletes" +REMOVED_POSITION_DELETE_FILES = "removed-position-delete-files" +TOTAL_EQUALITY_DELETES = "total-equality-deletes" +TOTAL_POSITION_DELETES = "total-position-deletes" +TOTAL_DATA_FILES = "total-data-files" +TOTAL_DELETE_FILES = "total-delete-files" +TOTAL_RECORDS = "total-records" +TOTAL_FILE_SIZE = "total-files-size" +CHANGED_PARTITION_COUNT_PROP = "changed-partition-count" CHANGED_PARTITION_PREFIX = "partitions." OPERATION = "operation" @@ -181,14 +181,14 @@ def __init__(self, operation: Operation, **data: Any) -> None: def __getitem__(self, __key: str) -> Optional[Any]: # type: ignore """Return a key as it is a map.""" - if __key.lower() == 'operation': + if __key.lower() == "operation": return self.operation else: return self._additional_properties.get(__key) def __setitem__(self, key: str, value: Any) -> None: """Set a key as it is a map.""" - if key.lower() == 'operation': + if key.lower() == "operation": self.operation = value else: self._additional_properties[key] = value @@ -274,14 +274,14 @@ def set_partition_summary_limit(self, limit: int) -> None: def add_file(self, data_file: DataFile, schema: Schema, partition_spec: PartitionSpec = UNPARTITIONED_PARTITION_SPEC) -> None: self.metrics.add_file(data_file) - if len(data_file.partition.record_fields()) != 0: + if len(data_file.partition) > 0: self.update_partition_metrics(partition_spec=partition_spec, file=data_file, is_add_file=True, schema=schema) def remove_file( self, data_file: DataFile, schema: Schema, partition_spec: PartitionSpec = UNPARTITIONED_PARTITION_SPEC ) -> None: self.metrics.remove_file(data_file) - if len(data_file.partition.record_fields()) != 0: + if len(data_file.partition) > 0: self.update_partition_metrics(partition_spec=partition_spec, file=data_file, is_add_file=False, schema=schema) def update_partition_metrics(self, partition_spec: PartitionSpec, file: DataFile, is_add_file: bool, schema: Schema) -> None: @@ -317,10 +317,10 @@ def _truncate_table_summary(summary: Summary, previous_summary: Mapping[str, str TOTAL_POSITION_DELETES, TOTAL_EQUALITY_DELETES, }: - summary[prop] = '0' + summary[prop] = "0" def get_prop(prop: str) -> int: - value = previous_summary.get(prop) or '0' + value = previous_summary.get(prop) or "0" try: return int(value) except ValueError as e: @@ -353,12 +353,12 @@ def update_snapshot_summaries( if not previous_summary: previous_summary = { - TOTAL_DATA_FILES: '0', - TOTAL_DELETE_FILES: '0', - TOTAL_RECORDS: '0', - TOTAL_FILE_SIZE: '0', - TOTAL_POSITION_DELETES: '0', - TOTAL_EQUALITY_DELETES: '0', + TOTAL_DATA_FILES: "0", + TOTAL_DELETE_FILES: "0", + TOTAL_RECORDS: "0", + TOTAL_FILE_SIZE: "0", + TOTAL_POSITION_DELETES: "0", + TOTAL_EQUALITY_DELETES: "0", } def _update_totals(total_property: str, added_property: str, removed_property: str) -> None: diff --git a/pyiceberg/transforms.py b/pyiceberg/transforms.py index 6dcae59e49..38cc6221a2 100644 --- a/pyiceberg/transforms.py +++ b/pyiceberg/transforms.py @@ -20,7 +20,7 @@ from abc import ABC, abstractmethod from enum import IntEnum from functools import singledispatch -from typing import Any, Callable, Generic, Optional, TypeVar +from typing import TYPE_CHECKING, Any, Callable, Generic, Optional, TypeVar from typing import Literal as LiteralType from uuid import UUID @@ -82,6 +82,9 @@ from pyiceberg.utils.parsing import ParseNumberFromBrackets from pyiceberg.utils.singleton import Singleton +if TYPE_CHECKING: + import pyarrow as pa + S = TypeVar("S") T = TypeVar("T") @@ -175,6 +178,13 @@ def __eq__(self, other: Any) -> bool: return self.root == other.root return False + @property + def supports_pyarrow_transform(self) -> bool: + return False + + @abstractmethod + def pyarrow_transform(self, source: IcebergType) -> "Callable[[pa.Array], pa.Array]": ... + class BucketTransform(Transform[S, int]): """Base Transform class to transform a value into a bucket partition value. @@ -290,6 +300,9 @@ def __repr__(self) -> str: """Return the string representation of the BucketTransform class.""" return f"BucketTransform(num_buckets={self._num_buckets})" + def pyarrow_transform(self, source: IcebergType) -> "Callable[[pa.Array], pa.Array]": + raise NotImplementedError() + class TimeResolution(IntEnum): YEAR = 6 @@ -349,6 +362,10 @@ def dedup_name(self) -> str: def preserves_order(self) -> bool: return True + @property + def supports_pyarrow_transform(self) -> bool: + return True + class YearTransform(TimeTransform[S]): """Transforms a datetime value into a year value. @@ -391,6 +408,21 @@ def __repr__(self) -> str: """Return the string representation of the YearTransform class.""" return "YearTransform()" + def pyarrow_transform(self, source: IcebergType) -> "Callable[[pa.Array], pa.Array]": + import pyarrow as pa + import pyarrow.compute as pc + + if isinstance(source, DateType): + epoch = datetime.EPOCH_DATE + elif isinstance(source, TimestampType): + epoch = datetime.EPOCH_TIMESTAMP + elif isinstance(source, TimestamptzType): + epoch = datetime.EPOCH_TIMESTAMPTZ + else: + raise ValueError(f"Cannot apply year transform for type: {source}") + + return lambda v: pc.years_between(pa.scalar(epoch), v) if v is not None else None + class MonthTransform(TimeTransform[S]): """Transforms a datetime value into a month value. @@ -433,6 +465,27 @@ def __repr__(self) -> str: """Return the string representation of the MonthTransform class.""" return "MonthTransform()" + def pyarrow_transform(self, source: IcebergType) -> "Callable[[pa.Array], pa.Array]": + import pyarrow as pa + import pyarrow.compute as pc + + if isinstance(source, DateType): + epoch = datetime.EPOCH_DATE + elif isinstance(source, TimestampType): + epoch = datetime.EPOCH_TIMESTAMP + elif isinstance(source, TimestamptzType): + epoch = datetime.EPOCH_TIMESTAMPTZ + else: + raise ValueError(f"Cannot apply month transform for type: {source}") + + def month_func(v: pa.Array) -> pa.Array: + return pc.add( + pc.multiply(pc.years_between(pa.scalar(epoch), v), pa.scalar(12)), + pc.add(pc.month(v), pa.scalar(-1)), + ) + + return lambda v: month_func(v) if v is not None else None + class DayTransform(TimeTransform[S]): """Transforms a datetime value into a day value. @@ -478,6 +531,21 @@ def __repr__(self) -> str: """Return the string representation of the DayTransform class.""" return "DayTransform()" + def pyarrow_transform(self, source: IcebergType) -> "Callable[[pa.Array], pa.Array]": + import pyarrow as pa + import pyarrow.compute as pc + + if isinstance(source, DateType): + epoch = datetime.EPOCH_DATE + elif isinstance(source, TimestampType): + epoch = datetime.EPOCH_TIMESTAMP + elif isinstance(source, TimestamptzType): + epoch = datetime.EPOCH_TIMESTAMPTZ + else: + raise ValueError(f"Cannot apply day transform for type: {source}") + + return lambda v: pc.days_between(pa.scalar(epoch), v) if v is not None else None + class HourTransform(TimeTransform[S]): """Transforms a datetime value into a hour value. @@ -515,6 +583,19 @@ def __repr__(self) -> str: """Return the string representation of the HourTransform class.""" return "HourTransform()" + def pyarrow_transform(self, source: IcebergType) -> "Callable[[pa.Array], pa.Array]": + import pyarrow as pa + import pyarrow.compute as pc + + if isinstance(source, TimestampType): + epoch = datetime.EPOCH_TIMESTAMP + elif isinstance(source, TimestamptzType): + epoch = datetime.EPOCH_TIMESTAMPTZ + else: + raise ValueError(f"Cannot apply hour transform for type: {source}") + + return lambda v: pc.hours_between(pa.scalar(epoch), v) if v is not None else None + def _base64encode(buffer: bytes) -> str: """Convert bytes to base64 string.""" @@ -585,6 +666,13 @@ def __repr__(self) -> str: """Return the string representation of the IdentityTransform class.""" return "IdentityTransform()" + def pyarrow_transform(self, source: IcebergType) -> "Callable[[pa.Array], pa.Array]": + return lambda v: v + + @property + def supports_pyarrow_transform(self) -> bool: + return True + class TruncateTransform(Transform[S, S]): """A transform for truncating a value to a specified width. @@ -725,6 +813,9 @@ def __repr__(self) -> str: """Return the string representation of the TruncateTransform class.""" return f"TruncateTransform(width={self._width})" + def pyarrow_transform(self, source: IcebergType) -> "Callable[[pa.Array], pa.Array]": + raise NotImplementedError() + @singledispatch def _human_string(value: Any, _type: IcebergType) -> str: @@ -807,6 +898,9 @@ def __repr__(self) -> str: """Return the string representation of the UnknownTransform class.""" return f"UnknownTransform(transform={repr(self._transform)})" + def pyarrow_transform(self, source: IcebergType) -> "Callable[[pa.Array], pa.Array]": + raise NotImplementedError() + class VoidTransform(Transform[S, None], Singleton): """A transform that always returns None.""" @@ -835,6 +929,9 @@ def __repr__(self) -> str: """Return the string representation of the VoidTransform class.""" return "VoidTransform()" + def pyarrow_transform(self, source: IcebergType) -> "Callable[[pa.Array], pa.Array]": + raise NotImplementedError() + def _truncate_number( name: str, pred: BoundLiteralPredicate[L], transform: Callable[[Optional[L]], Optional[L]] diff --git a/pyiceberg/typedef.py b/pyiceberg/typedef.py index 6ccf9526ba..2ff123148b 100644 --- a/pyiceberg/typedef.py +++ b/pyiceberg/typedef.py @@ -25,7 +25,6 @@ Callable, Dict, Generic, - List, Literal, Optional, Protocol, @@ -53,7 +52,7 @@ def update(self, *args: Any, **kwargs: Any) -> None: raise AttributeError("FrozenDict does not support .update()") -UTF8 = 'utf-8' +UTF8 = "utf-8" EMPTY_DICT = FrozenDict() @@ -198,9 +197,9 @@ def __repr__(self) -> str: """Return the string representation of the Record class.""" return f"{self.__class__.__name__}[{', '.join(f'{key}={repr(value)}' for key, value in self.__dict__.items() if not key.startswith('_'))}]" - def record_fields(self) -> List[str]: - """Return values of all the fields of the Record class except those specified in skip_fields.""" - return [self.__getattribute__(v) if hasattr(self, v) else None for v in self._position_to_field_name] + def __len__(self) -> int: + """Return the number of fields in the Record class.""" + return len(self._position_to_field_name) def __hash__(self) -> int: """Return hash value of the Record class.""" diff --git a/pyiceberg/utils/config.py b/pyiceberg/utils/config.py index 8b1b81d3a7..5eb9cfaa66 100644 --- a/pyiceberg/utils/config.py +++ b/pyiceberg/utils/config.py @@ -127,7 +127,7 @@ def set_property(_config: RecursiveDict, path: List[str], config_value: str) -> if env_var_lower.startswith(PYICEBERG.lower()): key = env_var_lower[len(PYICEBERG) :] parts = key.split("__", maxsplit=2) - parts_normalized = [part.replace('__', '.').replace("_", "-") for part in parts] + parts_normalized = [part.replace("__", ".").replace("_", "-") for part in parts] set_property(config, parts_normalized, config_value) return config diff --git a/pylintrc b/pylintrc deleted file mode 100644 index 9835535209..0000000000 --- a/pylintrc +++ /dev/null @@ -1,565 +0,0 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -[MASTER] - -# A comma-separated list of package or module names from where C extensions may -# be loaded. Extensions are loading into the active Python interpreter and may -# run arbitrary code. -extension-pkg-allow-list= - -# A comma-separated list of package or module names from where C extensions may -# be loaded. Extensions are loading into the active Python interpreter and may -# run arbitrary code. (This is an alternative name to extension-pkg-allow-list -# for backward compatibility.) -extension-pkg-whitelist= - -# Return non-zero exit code if any of these messages/categories are detected, -# even if score is above --fail-under value. Syntax same as enable. Messages -# specified are enabled, while categories only check already-enabled messages. -fail-on= - -# Specify a score threshold to be exceeded before program exits with error. -fail-under=10.0 - -# Files or directories to be skipped. They should be base names, not paths. -ignore=CVS - -# Add files or directories matching the regex patterns to the ignore-list. The -# regex matches against paths and can be in Posix or Windows format. -ignore-paths= - -# Files or directories matching the regex patterns are skipped. The regex -# matches against base names, not paths. The default value ignores emacs file -# locks -ignore-patterns=^\.# - -# Python code to execute, usually for sys.path manipulation such as -# pygtk.require(). -#init-hook= - -# Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the -# number of processors available to use. -jobs=1 - -# Control the amount of potential inferred values when inferring a single -# object. This can help the performance when dealing with large functions or -# complex, nested conditions. -limit-inference-results=100 - -# List of plugins (as comma separated values of python module names) to load, -# usually to register additional checkers. -load-plugins= - -# Pickle collected data for later comparisons. -persistent=yes - -# Minimum Python version to use for version dependent checks. Will default to -# the version used to run pylint. -py-version=3.9 - -# Discover python modules and packages in the file system subtree. -recursive=no - -# When enabled, pylint would attempt to guess common misconfiguration and emit -# user-friendly hints instead of false-positive error messages. -suggestion-mode=yes - -# Allow loading of arbitrary C extensions. Extensions are imported into the -# active Python interpreter and may run arbitrary code. -unsafe-load-any-extension=no - - -[MESSAGES CONTROL] - -# Only show warnings with the listed confidence levels. Leave empty to show -# all. Valid levels: HIGH, CONTROL_FLOW, INFERENCE, INFERENCE_FAILURE, -# UNDEFINED. -confidence= - -# Disable the message, report, category or checker with the given id(s). You -# can either give multiple identifiers separated by comma (,) or put this -# option multiple times (only on the command line, not in the configuration -# file where it should appear only once). You can also use "--disable=all" to -# disable everything first and then re-enable specific checks. For example, if -# you want to run only the similarities checker, you can use "--disable=all -# --enable=similarities". If you want to run only the classes checker, but have -# no Warning level messages displayed, use "--disable=all --enable=classes -# --disable=W". -disable=all - -# Enable the message, report, category or checker with the given id(s). You can -# either give multiple identifier separated by comma (,) or put this option -# multiple time (only on the command line, not in the configuration file where -# it should appear only once). See also the "--disable" option for examples. -enable=W - - -[REPORTS] - -# Python expression which should return a score less than or equal to 10. You -# have access to the variables 'fatal', 'error', 'warning', 'refactor', -# 'convention', and 'info' which contain the number of messages in each -# category, as well as 'statement' which is the total number of statements -# analyzed. This score is used by the global evaluation report (RP0004). -evaluation=max(0, 0 if fatal else 10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)) - -# Template used to display messages. This is a python new-style format string -# used to format the message information. See doc for all details. -#msg-template= - -# Set the output format. Available formats are text, parseable, colorized, json -# and msvs (visual studio). You can also give a reporter class, e.g. -# mypackage.mymodule.MyReporterClass. -output-format=text - -# Tells whether to display a full report or only the messages. -reports=no - -# Activate the evaluation score. -score=yes - - -[REFACTORING] - -# Maximum number of nested blocks for function / method body -max-nested-blocks=5 - -# Complete name of functions that never returns. When checking for -# inconsistent-return-statements if a never returning function is called then -# it will be considered as an explicit return statement and no message will be -# printed. -never-returning-functions=sys.exit,argparse.parse_error - - -[LOGGING] - -# The type of string formatting that logging methods do. `old` means using % -# formatting, `new` is for `{}` formatting. -logging-format-style=old - -# Logging modules to check that the string format arguments are in logging -# function parameter format. -logging-modules=logging - -[MISCELLANEOUS] - -# List of note tags to take in consideration, separated by a comma. -notes=FIXME, - XXX, - TODO - -# Regular expression of note tags to take in consideration. -#notes-rgx= - - -[TYPECHECK] - -# List of decorators that produce context managers, such as -# contextlib.contextmanager. Add to this list to register other decorators that -# produce valid context managers. -contextmanager-decorators=contextlib.contextmanager - -# List of members which are set dynamically and missed by pylint inference -# system, and so shouldn't trigger E1101 when accessed. Python regular -# expressions are accepted. -generated-members= - -# Tells whether missing members accessed in mixin class should be ignored. A -# class is considered mixin if its name matches the mixin-class-rgx option. -ignore-mixin-members=yes - -# Tells whether to warn about missing members when the owner of the attribute -# is inferred to be None. -ignore-none=yes - -# This flag controls whether pylint should warn about no-member and similar -# checks whenever an opaque object is returned when inferring. The inference -# can return multiple potential results while evaluating a Python object, but -# some branches might not be evaluated, which results in partial inference. In -# that case, it might be useful to still emit no-member and other checks for -# the rest of the inferred objects. -ignore-on-opaque-inference=yes - -# List of class names for which member attributes should not be checked (useful -# for classes with dynamically set attributes). This supports the use of -# qualified names. -ignored-classes=optparse.Values,thread._local,_thread._local - -# List of module names for which member attributes should not be checked -# (useful for modules/projects where namespaces are manipulated during runtime -# and thus existing member attributes cannot be deduced by static analysis). It -# supports qualified module names, as well as Unix pattern matching. -ignored-modules= - -# Show a hint with possible names when a member name was not found. The aspect -# of finding the hint is based on edit distance. -missing-member-hint=yes - -# The minimum edit distance a name should have in order to be considered a -# similar match for a missing member name. -missing-member-hint-distance=1 - -# The total number of similar names that should be taken in consideration when -# showing a hint for a missing member. -missing-member-max-choices=1 - -# Regex pattern to define which classes are considered mixins ignore-mixin- -# members is set to 'yes' -mixin-class-rgx=.*[Mm]ixin - -# List of decorators that change the signature of a decorated function. -signature-mutators= - - -[VARIABLES] - -# List of additional names supposed to be defined in builtins. Remember that -# you should avoid defining new builtins when possible. -additional-builtins= - -# Tells whether unused global variables should be treated as a violation. -allow-global-unused-variables=yes - -# List of names allowed to shadow builtins -allowed-redefined-builtins= - -# List of strings which can identify a callback function by name. A callback -# name must start or end with one of those strings. -callbacks=cb_, - _cb - -# A regular expression matching the name of dummy variables (i.e. expected to -# not be used). -dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_ - -# Argument names that match this expression will be ignored. Default to name -# with leading underscore. -ignored-argument-names=_.*|^ignored_|^unused_ - -# Tells whether we should check for unused import in __init__ files. -init-import=no - -# List of qualified module names which can have objects that can redefine -# builtins. -redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io - - -[FORMAT] - -# Expected format of line ending, e.g. empty (any line ending), LF or CRLF. -expected-line-ending-format= - -# Regexp for a line that is allowed to be longer than the limit. -ignore-long-lines=^\s*(# )??$ - -# Number of spaces of indent required inside a hanging or continued line. -indent-after-paren=4 - -# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 -# tab). -indent-string=' ' - -# Maximum number of characters on a single line. -max-line-length=130 - -# Maximum number of lines in a module. -max-module-lines=1000 - -# Allow the body of a class to be on the same line as the declaration if body -# contains single statement. -single-line-class-stmt=no - -# Allow the body of an if to be on the same line as the test if there is no -# else. -single-line-if-stmt=no - - -[SIMILARITIES] - -# Comments are removed from the similarity computation -ignore-comments=yes - -# Docstrings are removed from the similarity computation -ignore-docstrings=yes - -# Imports are removed from the similarity computation -ignore-imports=no - -# Signatures are removed from the similarity computation -ignore-signatures=no - -# Minimum lines number of a similarity. -min-similarity-lines=4 - - -[STRING] - -# This flag controls whether inconsistent-quotes generates a warning when the -# character used as a quote delimiter is used inconsistently within a module. -check-quote-consistency=no - -# This flag controls whether the implicit-str-concat should generate a warning -# on implicit string concatenation in sequences defined over several lines. -check-str-concat-over-line-jumps=no - - -[BASIC] - -# Naming style matching correct argument names. -argument-naming-style=snake_case - -# Regular expression matching correct argument names. Overrides argument- -# naming-style. If left empty, argument names will be checked with the set -# naming style. -#argument-rgx= - -# Naming style matching correct attribute names. -attr-naming-style=snake_case - -# Regular expression matching correct attribute names. Overrides attr-naming- -# style. If left empty, attribute names will be checked with the set naming -# style. -#attr-rgx= - -# Bad variable names which should always be refused, separated by a comma. -bad-names=foo, - bar, - baz, - toto, - tutu, - tata - -# Bad variable names regexes, separated by a comma. If names match any regex, -# they will always be refused -bad-names-rgxs= - -# Naming style matching correct class attribute names. -class-attribute-naming-style=any - -# Regular expression matching correct class attribute names. Overrides class- -# attribute-naming-style. If left empty, class attribute names will be checked -# with the set naming style. -#class-attribute-rgx= - -# Naming style matching correct class constant names. -class-const-naming-style=UPPER_CASE - -# Regular expression matching correct class constant names. Overrides class- -# const-naming-style. If left empty, class constant names will be checked with -# the set naming style. -#class-const-rgx= - -# Naming style matching correct class names. -class-naming-style=PascalCase - -# Regular expression matching correct class names. Overrides class-naming- -# style. If left empty, class names will be checked with the set naming style. -#class-rgx= - -# Naming style matching correct constant names. -const-naming-style=UPPER_CASE - -# Regular expression matching correct constant names. Overrides const-naming- -# style. If left empty, constant names will be checked with the set naming -# style. -#const-rgx= - -# Minimum line length for functions/classes that require docstrings, shorter -# ones are exempt. -docstring-min-length=-1 - -# Naming style matching correct function names. -function-naming-style=snake_case - -# Regular expression matching correct function names. Overrides function- -# naming-style. If left empty, function names will be checked with the set -# naming style. -#function-rgx= - -# Good variable names which should always be accepted, separated by a comma. -good-names=i, - j, - k, - ex, - Run, - _ - -# Good variable names regexes, separated by a comma. If names match any regex, -# they will always be accepted -good-names-rgxs= - -# Include a hint for the correct naming format with invalid-name. -include-naming-hint=no - -# Naming style matching correct inline iteration names. -inlinevar-naming-style=any - -# Regular expression matching correct inline iteration names. Overrides -# inlinevar-naming-style. If left empty, inline iteration names will be checked -# with the set naming style. -#inlinevar-rgx= - -# Naming style matching correct method names. -method-naming-style=snake_case - -# Regular expression matching correct method names. Overrides method-naming- -# style. If left empty, method names will be checked with the set naming style. -#method-rgx= - -# Naming style matching correct module names. -module-naming-style=snake_case - -# Regular expression matching correct module names. Overrides module-naming- -# style. If left empty, module names will be checked with the set naming style. -#module-rgx= - -# Colon-delimited sets of names that determine each other's naming style when -# the name regexes allow several styles. -name-group= - -# Regular expression which should only match function or class names that do -# not require a docstring. -no-docstring-rgx=^_ - -# List of decorators that produce properties, such as abc.abstractproperty. Add -# to this list to register other decorators that produce valid properties. -# These decorators are taken in consideration only for invalid-name. -property-classes=abc.abstractproperty - -# Regular expression matching correct type variable names. If left empty, type -# variable names will be checked with the set naming style. -#typevar-rgx= - -# Naming style matching correct variable names. -variable-naming-style=snake_case - -# Regular expression matching correct variable names. Overrides variable- -# naming-style. If left empty, variable names will be checked with the set -# naming style. -#variable-rgx= - - -[CLASSES] - -# Warn about protected attribute access inside special methods -check-protected-access-in-special-methods=no - -# List of method names used to declare (i.e. assign) instance attributes. -defining-attr-methods=__init__, - __new__, - setUp, - __post_init__ - -# List of member names, which should be excluded from the protected access -# warning. -exclude-protected=_asdict, - _fields, - _replace, - _source, - _make - -# List of valid names for the first argument in a class method. -valid-classmethod-first-arg=cls - -# List of valid names for the first argument in a metaclass class method. -valid-metaclass-classmethod-first-arg=cls - - -[IMPORTS] - -# List of modules that can be imported at any level, not just the top level -# one. -allow-any-import-level= - -# Allow wildcard imports from modules that define __all__. -allow-wildcard-with-all=no - -# Analyse import fallback blocks. This can be used to support both Python 2 and -# 3 compatible code, which means that the block might have code that exists -# only in one or another interpreter, leading to false positives when analysed. -analyse-fallback-blocks=no - -# Deprecated modules which should not be used, separated by a comma. -deprecated-modules= - -# Output a graph (.gv or any supported image format) of external dependencies -# to the given file (report RP0402 must not be disabled). -ext-import-graph= - -# Output a graph (.gv or any supported image format) of all (i.e. internal and -# external) dependencies to the given file (report RP0402 must not be -# disabled). -import-graph= - -# Output a graph (.gv or any supported image format) of internal dependencies -# to the given file (report RP0402 must not be disabled). -int-import-graph= - -# Force import order to recognize a module as part of the standard -# compatibility libraries. -known-standard-library= - -# Force import order to recognize a module as part of a third party library. -known-third-party=enchant - -# Couples of modules and preferred modules, separated by a comma. -preferred-modules= - - -[DESIGN] - -# List of regular expressions of class ancestor names to ignore when counting -# public methods (see R0903) -exclude-too-few-public-methods= - -# List of qualified class names to ignore when counting class parents (see -# R0901) -ignored-parents= - -# Maximum number of arguments for function / method. -max-args=5 - -# Maximum number of attributes for a class (see R0902). -max-attributes=7 - -# Maximum number of boolean expressions in an if statement (see R0916). -max-bool-expr=5 - -# Maximum number of branch for function / method body. -max-branches=12 - -# Maximum number of locals for function / method body. -max-locals=15 - -# Maximum number of parents for a class (see R0901). -max-parents=7 - -# Maximum number of public methods for a class (see R0904). -max-public-methods=20 - -# Maximum number of return / yield for function / method body. -max-returns=6 - -# Maximum number of statements in function / method body. -max-statements=50 - -# Minimum number of public methods for a class (see R0903). -min-public-methods=2 - - -[EXCEPTIONS] - -# Exceptions that will emit a warning when being caught. Defaults to -# "BaseException, Exception". -overgeneral-exceptions=BaseException, - Exception diff --git a/pyproject.toml b/pyproject.toml index 2682e16173..3a928ec47c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -82,10 +82,10 @@ fastavro = "1.9.4" coverage = { version = "^7.4.2", extras = ["toml"] } requests-mock = "1.12.1" moto = { version = "^5.0.2", extras = ["server"] } -typing-extensions = "4.11.0" +typing-extensions = "4.12.0" pytest-mock = "3.14.0" pyspark = "3.5.1" -cython = "3.0.8" +cython = "3.0.10" deptry = ">=0.14,<0.17" docutils = "!=0.21" diff --git a/ruff.toml b/ruff.toml index 92fb9a9c80..caaa108c84 100644 --- a/ruff.toml +++ b/ruff.toml @@ -80,4 +80,4 @@ known-first-party = ["pyiceberg", "tests"] section-order = ["future", "standard-library", "third-party", "first-party", "local-folder"] [format] -quote-style = "preserve" +quote-style = "double" diff --git a/tests/avro/test_file.py b/tests/avro/test_file.py index 0809f56fea..4df132304c 100644 --- a/tests/avro/test_file.py +++ b/tests/avro/test_file.py @@ -173,13 +173,13 @@ def test_write_manifest_entry_with_iceberg_read_with_fastavro_v1() -> None: v2_entry = todict(entry) # These are not written in V1 - del v2_entry['data_sequence_number'] - del v2_entry['file_sequence_number'] - del v2_entry['data_file']['content'] - del v2_entry['data_file']['equality_ids'] + del v2_entry["data_sequence_number"] + del v2_entry["file_sequence_number"] + del v2_entry["data_file"]["content"] + del v2_entry["data_file"]["equality_ids"] # Required in V1 - v2_entry['data_file']['block_size_in_bytes'] = DEFAULT_BLOCK_SIZE + v2_entry["data_file"]["block_size_in_bytes"] = DEFAULT_BLOCK_SIZE assert v2_entry == fa_entry diff --git a/tests/catalog/integration_test_dynamodb.py b/tests/catalog/integration_test_dynamodb.py index 5b9584c69f..05d51bb0ef 100644 --- a/tests/catalog/integration_test_dynamodb.py +++ b/tests/catalog/integration_test_dynamodb.py @@ -184,6 +184,12 @@ def test_create_duplicate_namespace(test_catalog: Catalog, database_name: str) - test_catalog.create_namespace(database_name) +def test_create_namepsace_if_not_exists(test_catalog: Catalog, database_name: str) -> None: + test_catalog.create_namespace(database_name) + test_catalog.create_namespace_if_not_exists(database_name) + assert (database_name,) in test_catalog.list_namespaces() + + def test_create_namespace_with_comment_and_location(test_catalog: Catalog, database_name: str) -> None: test_location = get_s3_path(get_bucket_name(), database_name) test_properties = { diff --git a/tests/catalog/integration_test_glue.py b/tests/catalog/integration_test_glue.py index a2c430de5f..21c415212a 100644 --- a/tests/catalog/integration_test_glue.py +++ b/tests/catalog/integration_test_glue.py @@ -291,6 +291,12 @@ def test_create_duplicate_namespace(test_catalog: Catalog, database_name: str) - test_catalog.create_namespace(database_name) +def test_create_namespace_if_not_exists(test_catalog: Catalog, database_name: str) -> None: + test_catalog.create_namespace(database_name) + test_catalog.create_namespace_if_not_exists(database_name) + assert (database_name,) in test_catalog.list_namespaces() + + def test_create_namespace_with_comment_and_location(test_catalog: Catalog, database_name: str) -> None: test_location = get_s3_path(get_bucket_name(), database_name) test_properties = { @@ -478,7 +484,7 @@ def test_commit_table_properties( updated_table_metadata = table.metadata assert MetastoreCatalog._parse_metadata_version(table.metadata_location) == 1 - assert updated_table_metadata.properties == {'Description': 'test_description', "test_a": "test_aa", "test_c": "test_c"} + assert updated_table_metadata.properties == {"Description": "test_description", "test_a": "test_aa", "test_c": "test_c"} table_info = glue.get_table( DatabaseName=database_name, @@ -564,3 +570,19 @@ def test_table_exists(test_catalog: Catalog, table_schema_nested: Schema, table_ test_catalog.create_namespace(database_name) test_catalog.create_table((database_name, table_name), table_schema_nested) assert test_catalog.table_exists((database_name, table_name)) is True + + +def test_register_table_with_given_location( + test_catalog: Catalog, table_schema_nested: Schema, table_name: str, database_name: str +) -> None: + identifier = (database_name, table_name) + new_identifier = (database_name, f"new_{table_name}") + test_catalog.create_namespace(database_name) + tbl = test_catalog.create_table(identifier, table_schema_nested) + location = tbl.metadata_location + test_catalog.drop_table(identifier) # drops the table but keeps the metadata file + assert not test_catalog.table_exists(identifier) + table = test_catalog.register_table(new_identifier, location) + assert table.identifier == (CATALOG_NAME,) + new_identifier + assert table.metadata_location == location + assert test_catalog.table_exists(new_identifier) diff --git a/tests/catalog/test_base.py b/tests/catalog/test_base.py index 7d5e0a973c..06e9a8a3aa 100644 --- a/tests/catalog/test_base.py +++ b/tests/catalog/test_base.py @@ -105,6 +105,7 @@ def create_table( if not location: location = f'{self._warehouse_location}/{"/".join(identifier)}' + location = location.rstrip("/") metadata_location = self._get_metadata_location(location=location) metadata = new_table_metadata( @@ -353,6 +354,19 @@ def test_create_table_location_override(catalog: InMemoryCatalog) -> None: assert table.location() == new_location +def test_create_table_removes_trailing_slash_from_location(catalog: InMemoryCatalog) -> None: + new_location = f"{catalog._warehouse_location}/new_location" + table = catalog.create_table( + identifier=TEST_TABLE_IDENTIFIER, + schema=TEST_TABLE_SCHEMA, + location=f"{new_location}/", + partition_spec=TEST_TABLE_PARTITION_SPEC, + properties=TEST_TABLE_PROPERTIES, + ) + assert catalog.load_table(TEST_TABLE_IDENTIFIER) == table + assert table.location() == new_location + + @pytest.mark.parametrize( "schema,expected", [ diff --git a/tests/catalog/test_dynamodb.py b/tests/catalog/test_dynamodb.py index 1c647cf828..7ad1301d9d 100644 --- a/tests/catalog/test_dynamodb.py +++ b/tests/catalog/test_dynamodb.py @@ -117,6 +117,21 @@ def test_create_table_with_given_location( assert TABLE_METADATA_LOCATION_REGEX.match(table.metadata_location) +@mock_aws +def test_create_table_removes_trailing_slash_in_location( + _bucket_initialize: None, moto_endpoint_url: str, table_schema_nested: Schema, database_name: str, table_name: str +) -> None: + catalog_name = "test_ddb_catalog" + identifier = (database_name, table_name) + test_catalog = DynamoDbCatalog(catalog_name, **{"s3.endpoint": moto_endpoint_url}) + test_catalog.create_namespace(namespace=database_name) + location = f"s3://{BUCKET_NAME}/{database_name}.db/{table_name}" + table = test_catalog.create_table(identifier=identifier, schema=table_schema_nested, location=f"{location}/") + assert table.identifier == (catalog_name,) + identifier + assert table.location() == location + assert TABLE_METADATA_LOCATION_REGEX.match(table.metadata_location) + + @mock_aws def test_create_table_with_no_location( _bucket_initialize: None, table_schema_nested: Schema, database_name: str, table_name: str @@ -554,10 +569,10 @@ def test_passing_provided_profile() -> None: } props = {"py-io-impl": "pyiceberg.io.fsspec.FsspecFileIO"} props.update(session_props) # type: ignore - with mock.patch('boto3.Session', return_value=mock.Mock()) as mock_session: + with mock.patch("boto3.Session", return_value=mock.Mock()) as mock_session: mock_client = mock.Mock() mock_session.return_value.client.return_value = mock_client - mock_client.describe_table.return_value = {'Table': {'TableStatus': 'ACTIVE'}} + mock_client.describe_table.return_value = {"Table": {"TableStatus": "ACTIVE"}} test_catalog = DynamoDbCatalog(catalog_name, **props) assert test_catalog.dynamodb is mock_client mock_session.assert_called_with(**session_props) @@ -575,4 +590,4 @@ def test_table_exists( # Act and Assert for an existing table assert test_catalog.table_exists(identifier) is True # Act and Assert for an non-existing table - assert test_catalog.table_exists(('non', 'exist')) is False + assert test_catalog.table_exists(("non", "exist")) is False diff --git a/tests/catalog/test_glue.py b/tests/catalog/test_glue.py index 5999b192a2..6b57f1dfe6 100644 --- a/tests/catalog/test_glue.py +++ b/tests/catalog/test_glue.py @@ -137,6 +137,22 @@ def test_create_table_with_given_location( assert test_catalog._parse_metadata_version(table.metadata_location) == 0 +@mock_aws +def test_create_table_removes_trailing_slash_in_location( + _bucket_initialize: None, moto_endpoint_url: str, table_schema_nested: Schema, database_name: str, table_name: str +) -> None: + catalog_name = "glue" + identifier = (database_name, table_name) + test_catalog = GlueCatalog(catalog_name, **{"s3.endpoint": moto_endpoint_url}) + test_catalog.create_namespace(namespace=database_name) + location = f"s3://{BUCKET_NAME}/{database_name}.db/{table_name}" + table = test_catalog.create_table(identifier=identifier, schema=table_schema_nested, location=f"{location}/") + assert table.identifier == (catalog_name,) + identifier + assert table.location() == location + assert TABLE_METADATA_LOCATION_REGEX.match(table.metadata_location) + assert test_catalog._parse_metadata_version(table.metadata_location) == 0 + + @mock_aws def test_create_table_with_pyarrow_schema( _bucket_initialize: None, @@ -699,7 +715,7 @@ def test_commit_table_properties( updated_table_metadata = table.metadata assert test_catalog._parse_metadata_version(table.metadata_location) == 1 - assert updated_table_metadata.properties == {'Description': 'test_description', "test_a": "test_aa", "test_c": "test_c"} + assert updated_table_metadata.properties == {"Description": "test_description", "test_a": "test_aa", "test_c": "test_c"} table_info = _glue.get_table( DatabaseName=database_name, @@ -831,4 +847,18 @@ def test_table_exists( # Act and Assert for an existing table assert test_catalog.table_exists(identifier) is True # Act and Assert for a non-existing table - assert test_catalog.table_exists(('non', 'exist')) is False + assert test_catalog.table_exists(("non", "exist")) is False + + +@mock_aws +def test_register_table_with_given_location( + _bucket_initialize: None, moto_endpoint_url: str, metadata_location: str, database_name: str, table_name: str +) -> None: + catalog_name = "glue" + identifier = (database_name, table_name) + location = metadata_location + test_catalog = GlueCatalog(catalog_name, **{"s3.endpoint": moto_endpoint_url, "warehouse": f"s3://{BUCKET_NAME}"}) + test_catalog.create_namespace(namespace=database_name, properties={"location": f"s3://{BUCKET_NAME}/{database_name}.db"}) + table = test_catalog.register_table(identifier, location) + assert table.identifier == (catalog_name,) + identifier + assert test_catalog.table_exists(identifier) is True diff --git a/tests/catalog/test_hive.py b/tests/catalog/test_hive.py index 70927ea1bc..96e95815be 100644 --- a/tests/catalog/test_hive.py +++ b/tests/catalog/test_hive.py @@ -24,6 +24,8 @@ AlreadyExistsException, FieldSchema, InvalidOperationException, + LockResponse, + LockState, MetaException, NoSuchObjectException, SerDeInfo, @@ -34,12 +36,19 @@ from hive_metastore.ttypes import Table as HiveTable from pyiceberg.catalog import PropertiesUpdateSummary -from pyiceberg.catalog.hive import HiveCatalog, _construct_hive_storage_descriptor +from pyiceberg.catalog.hive import ( + LOCK_CHECK_MAX_WAIT_TIME, + LOCK_CHECK_MIN_WAIT_TIME, + LOCK_CHECK_RETRIES, + HiveCatalog, + _construct_hive_storage_descriptor, +) from pyiceberg.exceptions import ( NamespaceAlreadyExistsError, NamespaceNotEmptyError, NoSuchNamespaceError, NoSuchTableError, + WaitingForLockException, ) from pyiceberg.partitioning import PartitionField, PartitionSpec from pyiceberg.schema import Schema @@ -225,27 +234,27 @@ def test_create_table( retention=None, sd=StorageDescriptor( cols=[ - FieldSchema(name='boolean', type='boolean', comment=None), - FieldSchema(name='integer', type='int', comment=None), - FieldSchema(name='long', type='bigint', comment=None), - FieldSchema(name='float', type='float', comment=None), - FieldSchema(name='double', type='double', comment=None), - FieldSchema(name='decimal', type='decimal(32,3)', comment=None), - FieldSchema(name='date', type='date', comment=None), - FieldSchema(name='time', type='string', comment=None), - FieldSchema(name='timestamp', type='timestamp', comment=None), + FieldSchema(name="boolean", type="boolean", comment=None), + FieldSchema(name="integer", type="int", comment=None), + FieldSchema(name="long", type="bigint", comment=None), + FieldSchema(name="float", type="float", comment=None), + FieldSchema(name="double", type="double", comment=None), + FieldSchema(name="decimal", type="decimal(32,3)", comment=None), + FieldSchema(name="date", type="date", comment=None), + FieldSchema(name="time", type="string", comment=None), + FieldSchema(name="timestamp", type="timestamp", comment=None), FieldSchema( - name='timestamptz', - type='timestamp' if hive2_compatible else 'timestamp with local time zone', + name="timestamptz", + type="timestamp" if hive2_compatible else "timestamp with local time zone", comment=None, ), - FieldSchema(name='string', type='string', comment=None), - FieldSchema(name='uuid', type='string', comment=None), - FieldSchema(name='fixed', type='binary', comment=None), - FieldSchema(name='binary', type='binary', comment=None), - FieldSchema(name='list', type='array', comment=None), - FieldSchema(name='map', type='map', comment=None), - FieldSchema(name='struct', type='struct', comment=None), + FieldSchema(name="string", type="string", comment=None), + FieldSchema(name="uuid", type="string", comment=None), + FieldSchema(name="fixed", type="binary", comment=None), + FieldSchema(name="binary", type="binary", comment=None), + FieldSchema(name="list", type="array", comment=None), + FieldSchema(name="map", type="map", comment=None), + FieldSchema(name="struct", type="struct", comment=None), ], location=f"{hive_database.locationUri}/table", inputFormat="org.apache.hadoop.mapred.FileInputFormat", @@ -305,40 +314,40 @@ def test_create_table( last_column_id=22, schemas=[ Schema( - NestedField(field_id=1, name='boolean', field_type=BooleanType(), required=True), - NestedField(field_id=2, name='integer', field_type=IntegerType(), required=True), - NestedField(field_id=3, name='long', field_type=LongType(), required=True), - NestedField(field_id=4, name='float', field_type=FloatType(), required=True), - NestedField(field_id=5, name='double', field_type=DoubleType(), required=True), - NestedField(field_id=6, name='decimal', field_type=DecimalType(precision=32, scale=3), required=True), - NestedField(field_id=7, name='date', field_type=DateType(), required=True), - NestedField(field_id=8, name='time', field_type=TimeType(), required=True), - NestedField(field_id=9, name='timestamp', field_type=TimestampType(), required=True), - NestedField(field_id=10, name='timestamptz', field_type=TimestamptzType(), required=True), - NestedField(field_id=11, name='string', field_type=StringType(), required=True), - NestedField(field_id=12, name='uuid', field_type=UUIDType(), required=True), - NestedField(field_id=13, name='fixed', field_type=FixedType(length=12), required=True), - NestedField(field_id=14, name='binary', field_type=BinaryType(), required=True), + NestedField(field_id=1, name="boolean", field_type=BooleanType(), required=True), + NestedField(field_id=2, name="integer", field_type=IntegerType(), required=True), + NestedField(field_id=3, name="long", field_type=LongType(), required=True), + NestedField(field_id=4, name="float", field_type=FloatType(), required=True), + NestedField(field_id=5, name="double", field_type=DoubleType(), required=True), + NestedField(field_id=6, name="decimal", field_type=DecimalType(precision=32, scale=3), required=True), + NestedField(field_id=7, name="date", field_type=DateType(), required=True), + NestedField(field_id=8, name="time", field_type=TimeType(), required=True), + NestedField(field_id=9, name="timestamp", field_type=TimestampType(), required=True), + NestedField(field_id=10, name="timestamptz", field_type=TimestamptzType(), required=True), + NestedField(field_id=11, name="string", field_type=StringType(), required=True), + NestedField(field_id=12, name="uuid", field_type=UUIDType(), required=True), + NestedField(field_id=13, name="fixed", field_type=FixedType(length=12), required=True), + NestedField(field_id=14, name="binary", field_type=BinaryType(), required=True), NestedField( field_id=15, - name='list', - field_type=ListType(type='list', element_id=18, element_type=StringType(), element_required=True), + name="list", + field_type=ListType(type="list", element_id=18, element_type=StringType(), element_required=True), required=True, ), NestedField( field_id=16, - name='map', + name="map", field_type=MapType( - type='map', key_id=19, key_type=StringType(), value_id=20, value_type=IntegerType(), value_required=True + type="map", key_id=19, key_type=StringType(), value_id=20, value_type=IntegerType(), value_required=True ), required=True, ), NestedField( field_id=17, - name='struct', + name="struct", field_type=StructType( - NestedField(field_id=21, name='inner_string', field_type=StringType(), required=False), - NestedField(field_id=22, name='inner_int', field_type=IntegerType(), required=True), + NestedField(field_id=21, name="inner_string", field_type=StringType(), required=False), + NestedField(field_id=22, name="inner_int", field_type=IntegerType(), required=True), ), required=False, ), @@ -348,7 +357,182 @@ def test_create_table( ], current_schema_id=0, last_partition_id=999, - properties={"owner": "javaberg", 'write.parquet.compression-codec': 'zstd'}, + properties={"owner": "javaberg", "write.parquet.compression-codec": "zstd"}, + partition_specs=[PartitionSpec()], + default_spec_id=0, + current_snapshot_id=None, + snapshots=[], + snapshot_log=[], + metadata_log=[], + sort_orders=[SortOrder(order_id=0)], + default_sort_order_id=0, + refs={}, + format_version=2, + last_sequence_number=0, + ) + + assert metadata.model_dump() == expected.model_dump() + + +@pytest.mark.parametrize("hive2_compatible", [True, False]) +@patch("time.time", MagicMock(return_value=12345)) +def test_create_table_with_given_location_removes_trailing_slash( + table_schema_with_all_types: Schema, hive_database: HiveDatabase, hive_table: HiveTable, hive2_compatible: bool +) -> None: + catalog = HiveCatalog(HIVE_CATALOG_NAME, uri=HIVE_METASTORE_FAKE_URL) + if hive2_compatible: + catalog = HiveCatalog(HIVE_CATALOG_NAME, uri=HIVE_METASTORE_FAKE_URL, **{"hive.hive2-compatible": "true"}) + + location = f"{hive_database.locationUri}/table-given-location" + + catalog._client = MagicMock() + catalog._client.__enter__().create_table.return_value = None + catalog._client.__enter__().get_table.return_value = hive_table + catalog._client.__enter__().get_database.return_value = hive_database + catalog.create_table( + ("default", "table"), schema=table_schema_with_all_types, properties={"owner": "javaberg"}, location=f"{location}/" + ) + + called_hive_table: HiveTable = catalog._client.__enter__().create_table.call_args[0][0] + # This one is generated within the function itself, so we need to extract + # it to construct the assert_called_with + metadata_location: str = called_hive_table.parameters["metadata_location"] + assert metadata_location.endswith(".metadata.json") + assert "/database/table-given-location/metadata/" in metadata_location + catalog._client.__enter__().create_table.assert_called_with( + HiveTable( + tableName="table", + dbName="default", + owner="javaberg", + createTime=12345, + lastAccessTime=12345, + retention=None, + sd=StorageDescriptor( + cols=[ + FieldSchema(name="boolean", type="boolean", comment=None), + FieldSchema(name="integer", type="int", comment=None), + FieldSchema(name="long", type="bigint", comment=None), + FieldSchema(name="float", type="float", comment=None), + FieldSchema(name="double", type="double", comment=None), + FieldSchema(name="decimal", type="decimal(32,3)", comment=None), + FieldSchema(name="date", type="date", comment=None), + FieldSchema(name="time", type="string", comment=None), + FieldSchema(name="timestamp", type="timestamp", comment=None), + FieldSchema( + name="timestamptz", + type="timestamp" if hive2_compatible else "timestamp with local time zone", + comment=None, + ), + FieldSchema(name="string", type="string", comment=None), + FieldSchema(name="uuid", type="string", comment=None), + FieldSchema(name="fixed", type="binary", comment=None), + FieldSchema(name="binary", type="binary", comment=None), + FieldSchema(name="list", type="array", comment=None), + FieldSchema(name="map", type="map", comment=None), + FieldSchema(name="struct", type="struct", comment=None), + ], + location=f"{hive_database.locationUri}/table-given-location", + inputFormat="org.apache.hadoop.mapred.FileInputFormat", + outputFormat="org.apache.hadoop.mapred.FileOutputFormat", + compressed=None, + numBuckets=None, + serdeInfo=SerDeInfo( + name=None, + serializationLib="org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe", + parameters=None, + description=None, + serializerClass=None, + deserializerClass=None, + serdeType=None, + ), + bucketCols=None, + sortCols=None, + parameters=None, + skewedInfo=None, + storedAsSubDirectories=None, + ), + partitionKeys=None, + parameters={"EXTERNAL": "TRUE", "table_type": "ICEBERG", "metadata_location": metadata_location}, + viewOriginalText=None, + viewExpandedText=None, + tableType="EXTERNAL_TABLE", + privileges=None, + temporary=False, + rewriteEnabled=None, + creationMetadata=None, + catName=None, + ownerType=1, + writeId=-1, + isStatsCompliant=None, + colStats=None, + accessType=None, + requiredReadCapabilities=None, + requiredWriteCapabilities=None, + id=None, + fileMetadata=None, + dictionary=None, + txnId=None, + ) + ) + + with open(metadata_location, encoding=UTF8) as f: + payload = f.read() + + metadata = TableMetadataUtil.parse_raw(payload) + + assert "database/table-given-location" in metadata.location + + expected = TableMetadataV2( + location=metadata.location, + table_uuid=metadata.table_uuid, + last_updated_ms=metadata.last_updated_ms, + last_column_id=22, + schemas=[ + Schema( + NestedField(field_id=1, name="boolean", field_type=BooleanType(), required=True), + NestedField(field_id=2, name="integer", field_type=IntegerType(), required=True), + NestedField(field_id=3, name="long", field_type=LongType(), required=True), + NestedField(field_id=4, name="float", field_type=FloatType(), required=True), + NestedField(field_id=5, name="double", field_type=DoubleType(), required=True), + NestedField(field_id=6, name="decimal", field_type=DecimalType(precision=32, scale=3), required=True), + NestedField(field_id=7, name="date", field_type=DateType(), required=True), + NestedField(field_id=8, name="time", field_type=TimeType(), required=True), + NestedField(field_id=9, name="timestamp", field_type=TimestampType(), required=True), + NestedField(field_id=10, name="timestamptz", field_type=TimestamptzType(), required=True), + NestedField(field_id=11, name="string", field_type=StringType(), required=True), + NestedField(field_id=12, name="uuid", field_type=UUIDType(), required=True), + NestedField(field_id=13, name="fixed", field_type=FixedType(length=12), required=True), + NestedField(field_id=14, name="binary", field_type=BinaryType(), required=True), + NestedField( + field_id=15, + name="list", + field_type=ListType(type="list", element_id=18, element_type=StringType(), element_required=True), + required=True, + ), + NestedField( + field_id=16, + name="map", + field_type=MapType( + type="map", key_id=19, key_type=StringType(), value_id=20, value_type=IntegerType(), value_required=True + ), + required=True, + ), + NestedField( + field_id=17, + name="struct", + field_type=StructType( + NestedField(field_id=21, name="inner_string", field_type=StringType(), required=False), + NestedField(field_id=22, name="inner_int", field_type=IntegerType(), required=True), + ), + required=False, + ), + schema_id=0, + identifier_field_ids=[2], + ) + ], + current_schema_id=0, + last_partition_id=999, + properties={"owner": "javaberg", "write.parquet.compression-codec": "zstd"}, partition_specs=[PartitionSpec()], default_spec_id=0, current_snapshot_id=None, @@ -983,3 +1167,31 @@ def test_resolve_table_location_warehouse(hive_database: HiveDatabase) -> None: location = catalog._resolve_table_location(None, "database", "table") assert location == "/tmp/warehouse/database.db/table" + + +def test_hive_wait_for_lock() -> None: + lockid = 12345 + acquired = LockResponse(lockid=lockid, state=LockState.ACQUIRED) + waiting = LockResponse(lockid=lockid, state=LockState.WAITING) + prop = { + "uri": HIVE_METASTORE_FAKE_URL, + LOCK_CHECK_MIN_WAIT_TIME: 0.1, + LOCK_CHECK_MAX_WAIT_TIME: 0.5, + LOCK_CHECK_RETRIES: 5, + } + catalog = HiveCatalog(HIVE_CATALOG_NAME, **prop) # type: ignore + catalog._client = MagicMock() + catalog._client.lock.return_value = LockResponse(lockid=lockid, state=LockState.WAITING) + + # lock will be acquired after 3 retries + catalog._client.check_lock.side_effect = [waiting if i < 2 else acquired for i in range(10)] + response: LockResponse = catalog._wait_for_lock("db", "tbl", lockid, catalog._client) + assert response.state == LockState.ACQUIRED + assert catalog._client.check_lock.call_count == 3 + + # lock wait should exit with WaitingForLockException finally after enough retries + catalog._client.check_lock.side_effect = [waiting for _ in range(10)] + catalog._client.check_lock.call_count = 0 + with pytest.raises(WaitingForLockException): + catalog._wait_for_lock("db", "tbl", lockid, catalog._client) + assert catalog._client.check_lock.call_count == 5 diff --git a/tests/catalog/test_rest.py b/tests/catalog/test_rest.py index 15ddb01b25..b5c626d6f0 100644 --- a/tests/catalog/test_rest.py +++ b/tests/catalog/test_rest.py @@ -500,6 +500,24 @@ def test_create_namespace_200(rest_mock: Mocker) -> None: RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN).create_namespace(namespace) +def test_create_namespace_if_exists_409(rest_mock: Mocker) -> None: + namespace = "examples" + rest_mock.post( + f"{TEST_URI}v1/namespaces", + json={ + "error": { + "message": "Namespace already exists: fokko in warehouse 8bcb0838-50fc-472d-9ddb-8feb89ef5f1e", + "type": "AlreadyExistsException", + "code": 409, + } + }, + status_code=409, + request_headers=TEST_HEADERS, + ) + + RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN).create_namespace_if_not_exists(namespace) + + def test_create_namespace_409(rest_mock: Mocker) -> None: namespace = "examples" rest_mock.post( @@ -673,6 +691,16 @@ def test_table_exist_200(rest_mock: Mocker) -> None: assert catalog.table_exists(("fokko", "table")) +def test_table_exist_204(rest_mock: Mocker) -> None: + rest_mock.head( + f"{TEST_URI}v1/namespaces/fokko/tables/table", + status_code=204, + request_headers=TEST_HEADERS, + ) + catalog = RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN) + assert catalog.table_exists(("fokko", "table")) + + def test_table_exist_500(rest_mock: Mocker) -> None: rest_mock.head( f"{TEST_URI}v1/namespaces/fokko/tables/table", @@ -732,6 +760,31 @@ def test_create_table_200( assert actual == expected +def test_create_table_with_given_location_removes_trailing_slash_200( + rest_mock: Mocker, table_schema_simple: Schema, example_table_metadata_no_snapshot_v1_rest_json: Dict[str, Any] +) -> None: + rest_mock.post( + f"{TEST_URI}v1/namespaces/fokko/tables", + json=example_table_metadata_no_snapshot_v1_rest_json, + status_code=200, + request_headers=TEST_HEADERS, + ) + catalog = RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN) + location = "s3://warehouse/database/table-custom-location" + catalog.create_table( + identifier=("fokko", "fokko2"), + schema=table_schema_simple, + location=f"{location}/", + partition_spec=PartitionSpec( + PartitionField(source_id=1, field_id=1000, transform=TruncateTransform(width=3), name="id"), spec_id=1 + ), + sort_order=SortOrder(SortField(source_id=2, transform=IdentityTransform())), + properties={"owner": "fokko"}, + ) + assert rest_mock.last_request + assert rest_mock.last_request.json()["location"] == location + + def test_create_table_409(rest_mock: Mocker, table_schema_simple: Schema) -> None: rest_mock.post( f"{TEST_URI}v1/namespaces/fokko/tables", diff --git a/tests/catalog/test_sql.py b/tests/catalog/test_sql.py index 40a1566e2f..545916223a 100644 --- a/tests/catalog/test_sql.py +++ b/tests/catalog/test_sql.py @@ -17,7 +17,7 @@ import os from pathlib import Path -from typing import Generator, List +from typing import Any, Generator, List import pyarrow as pa import pytest @@ -25,6 +25,9 @@ from pytest_lazyfixture import lazy_fixture from sqlalchemy.exc import ArgumentError, IntegrityError +from pyiceberg.catalog import ( + Catalog, +) from pyiceberg.catalog.sql import SqlCatalog from pyiceberg.exceptions import ( CommitFailedException, @@ -52,51 +55,90 @@ from pyiceberg.types import IntegerType -@pytest.fixture(name="random_identifier") -def fixture_random_identifier(warehouse: Path, database_name: str, table_name: str) -> Identifier: +@pytest.fixture(scope="module") +def catalog_name() -> str: + return "test_sql_catalog" + + +@pytest.fixture(name="random_table_identifier") +def fixture_random_table_identifier(warehouse: Path, database_name: str, table_name: str) -> Identifier: os.makedirs(f"{warehouse}/{database_name}.db/{table_name}/metadata/", exist_ok=True) return database_name, table_name -@pytest.fixture(name="another_random_identifier") -def fixture_another_random_identifier(warehouse: Path, database_name: str, table_name: str) -> Identifier: +@pytest.fixture(name="random_table_identifier_with_catalog") +def fixture_random_table_identifier_with_catalog( + warehouse: Path, catalog_name: str, database_name: str, table_name: str +) -> Identifier: + os.makedirs(f"{warehouse}/{database_name}.db/{table_name}/metadata/", exist_ok=True) + return catalog_name, database_name, table_name + + +@pytest.fixture(name="another_random_table_identifier") +def fixture_another_random_table_identifier(warehouse: Path, database_name: str, table_name: str) -> Identifier: database_name = database_name + "_new" table_name = table_name + "_new" os.makedirs(f"{warehouse}/{database_name}.db/{table_name}/metadata/", exist_ok=True) return database_name, table_name +@pytest.fixture(name="another_random_table_identifier_with_catalog") +def fixture_another_random_table_identifier_with_catalog( + warehouse: Path, catalog_name: str, database_name: str, table_name: str +) -> Identifier: + database_name = database_name + "_new" + table_name = table_name + "_new" + os.makedirs(f"{warehouse}/{database_name}.db/{table_name}/metadata/", exist_ok=True) + return catalog_name, database_name, table_name + + +@pytest.fixture(name="random_hierarchical_identifier") +def fixture_random_hierarchical_identifier(warehouse: Path, hierarchical_namespace_name: str, table_name: str) -> Identifier: + os.makedirs(f"{warehouse}/{hierarchical_namespace_name}.db/{table_name}/metadata/", exist_ok=True) + return Catalog.identifier_to_tuple(".".join((hierarchical_namespace_name, table_name))) + + +@pytest.fixture(name="another_random_hierarchical_identifier") +def fixture_another_random_hierarchical_identifier( + warehouse: Path, hierarchical_namespace_name: str, table_name: str +) -> Identifier: + hierarchical_namespace_name = hierarchical_namespace_name + "_new" + table_name = table_name + "_new" + os.makedirs(f"{warehouse}/{hierarchical_namespace_name}.db/{table_name}/metadata/", exist_ok=True) + return Catalog.identifier_to_tuple(".".join((hierarchical_namespace_name, table_name))) + + @pytest.fixture(scope="module") -def catalog_memory(warehouse: Path) -> Generator[SqlCatalog, None, None]: +def catalog_memory(catalog_name: str, warehouse: Path) -> Generator[SqlCatalog, None, None]: props = { "uri": "sqlite:///:memory:", "warehouse": f"file://{warehouse}", } - catalog = SqlCatalog("test_sql_catalog", **props) + catalog = SqlCatalog(catalog_name, **props) catalog.create_tables() yield catalog catalog.destroy_tables() @pytest.fixture(scope="module") -def catalog_sqlite(warehouse: Path) -> Generator[SqlCatalog, None, None]: +def catalog_sqlite(catalog_name: str, warehouse: Path) -> Generator[SqlCatalog, None, None]: props = { "uri": f"sqlite:////{warehouse}/sql-catalog.db", "warehouse": f"file://{warehouse}", } - catalog = SqlCatalog("test_sql_catalog", **props) + catalog = SqlCatalog(catalog_name, **props) catalog.create_tables() yield catalog catalog.destroy_tables() @pytest.fixture(scope="module") -def catalog_sqlite_without_rowcount(warehouse: Path) -> Generator[SqlCatalog, None, None]: +def catalog_sqlite_without_rowcount(catalog_name: str, warehouse: Path) -> Generator[SqlCatalog, None, None]: props = { "uri": f"sqlite:////{warehouse}/sql-catalog.db", "warehouse": f"file://{warehouse}", } - catalog = SqlCatalog("test_sql_catalog", **props) + catalog = SqlCatalog(catalog_name, **props) catalog.engine.dialect.supports_sane_rowcount = False catalog.create_tables() yield catalog @@ -104,33 +146,33 @@ def catalog_sqlite_without_rowcount(warehouse: Path) -> Generator[SqlCatalog, No @pytest.fixture(scope="module") -def catalog_sqlite_fsspec(warehouse: Path) -> Generator[SqlCatalog, None, None]: +def catalog_sqlite_fsspec(catalog_name: str, warehouse: Path) -> Generator[SqlCatalog, None, None]: props = { "uri": f"sqlite:////{warehouse}/sql-catalog.db", "warehouse": f"file://{warehouse}", PY_IO_IMPL: FSSPEC_FILE_IO, } - catalog = SqlCatalog("test_sql_catalog", **props) + catalog = SqlCatalog(catalog_name, **props) catalog.create_tables() yield catalog catalog.destroy_tables() -def test_creation_with_no_uri() -> None: +def test_creation_with_no_uri(catalog_name: str) -> None: with pytest.raises(NoSuchPropertyException): - SqlCatalog("test_ddb_catalog", not_uri="unused") + SqlCatalog(catalog_name, not_uri="unused") -def test_creation_with_unsupported_uri() -> None: +def test_creation_with_unsupported_uri(catalog_name: str) -> None: with pytest.raises(ArgumentError): - SqlCatalog("test_ddb_catalog", uri="unsupported:xxx") + SqlCatalog(catalog_name, uri="unsupported:xxx") @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), ], ) def test_create_tables_idempotency(catalog: SqlCatalog) -> None: @@ -140,67 +182,102 @@ def test_create_tables_idempotency(catalog: SqlCatalog) -> None: @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), ], ) -def test_create_table_default_sort_order(catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier) -> None: - database_name, _table_name = random_identifier - catalog.create_namespace(database_name) - table = catalog.create_table(random_identifier, table_schema_nested) +@pytest.mark.parametrize( + "table_identifier", + [ + lazy_fixture("random_table_identifier"), + lazy_fixture("random_hierarchical_identifier"), + lazy_fixture("random_table_identifier_with_catalog"), + ], +) +def test_create_table_default_sort_order(catalog: SqlCatalog, table_schema_nested: Schema, table_identifier: Identifier) -> None: + table_identifier_nocatalog = catalog.identifier_to_tuple_without_catalog(table_identifier) + namespace = Catalog.namespace_from(table_identifier_nocatalog) + catalog.create_namespace(namespace) + table = catalog.create_table(table_identifier, table_schema_nested) assert table.sort_order().order_id == 0, "Order ID must match" assert table.sort_order().is_unsorted is True, "Order must be unsorted" - catalog.drop_table(random_identifier) + catalog.drop_table(table_identifier) @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), ], ) -def test_create_v1_table(catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier) -> None: - database_name, _table_name = random_identifier - catalog.create_namespace(database_name) - table = catalog.create_table(random_identifier, table_schema_nested, properties={"format-version": "1"}) +@pytest.mark.parametrize( + "table_identifier", + [ + lazy_fixture("random_table_identifier"), + lazy_fixture("random_hierarchical_identifier"), + lazy_fixture("random_table_identifier_with_catalog"), + ], +) +def test_create_v1_table(catalog: SqlCatalog, table_schema_nested: Schema, table_identifier: Identifier) -> None: + table_identifier_nocatalog = catalog.identifier_to_tuple_without_catalog(table_identifier) + namespace = Catalog.namespace_from(table_identifier_nocatalog) + catalog.create_namespace(namespace) + table = catalog.create_table(table_identifier, table_schema_nested, properties={"format-version": "1"}) assert table.sort_order().order_id == 0, "Order ID must match" assert table.sort_order().is_unsorted is True, "Order must be unsorted" assert table.format_version == 1 assert table.spec() == UNPARTITIONED_PARTITION_SPEC - catalog.drop_table(random_identifier) + catalog.drop_table(table_identifier) @pytest.mark.parametrize( - 'catalog', + "catalog", + [ + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), + ], +) +@pytest.mark.parametrize( + "table_identifier", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), + lazy_fixture("random_table_identifier"), + lazy_fixture("random_hierarchical_identifier"), + lazy_fixture("random_table_identifier_with_catalog"), ], ) def test_create_table_with_pyarrow_schema( catalog: SqlCatalog, pyarrow_schema_simple_without_ids: pa.Schema, iceberg_table_schema_simple: Schema, - random_identifier: Identifier, + table_identifier: Identifier, ) -> None: - database_name, _table_name = random_identifier - catalog.create_namespace(database_name) - table = catalog.create_table(random_identifier, pyarrow_schema_simple_without_ids) + table_identifier_nocatalog = catalog.identifier_to_tuple_without_catalog(table_identifier) + namespace = Catalog.namespace_from(table_identifier_nocatalog) + catalog.create_namespace(namespace) + table = catalog.create_table(table_identifier, pyarrow_schema_simple_without_ids) assert table.schema() == iceberg_table_schema_simple - catalog.drop_table(random_identifier) + catalog.drop_table(table_identifier) @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), ], ) -def test_write_pyarrow_schema(catalog: SqlCatalog, random_identifier: Identifier) -> None: +@pytest.mark.parametrize( + "table_identifier", + [ + lazy_fixture("random_table_identifier"), + lazy_fixture("random_hierarchical_identifier"), + lazy_fixture("random_table_identifier_with_catalog"), + ], +) +def test_write_pyarrow_schema(catalog: SqlCatalog, table_identifier: Identifier) -> None: import pyarrow as pa pyarrow_table = pa.Table.from_arrays( @@ -211,96 +288,165 @@ def test_write_pyarrow_schema(catalog: SqlCatalog, random_identifier: Identifier pa.array([None, "A", "B", "C"]), # 'large' column ], schema=pa.schema([ - pa.field('foo', pa.string(), nullable=True), - pa.field('bar', pa.int32(), nullable=False), - pa.field('baz', pa.bool_(), nullable=True), - pa.field('large', pa.large_string(), nullable=True), + pa.field("foo", pa.string(), nullable=True), + pa.field("bar", pa.int32(), nullable=False), + pa.field("baz", pa.bool_(), nullable=True), + pa.field("large", pa.large_string(), nullable=True), ]), ) - database_name, _table_name = random_identifier - catalog.create_namespace(database_name) - table = catalog.create_table(random_identifier, pyarrow_table.schema) + table_identifier_nocatalog = catalog.identifier_to_tuple_without_catalog(table_identifier) + namespace = Catalog.namespace_from(table_identifier_nocatalog) + catalog.create_namespace(namespace) + table = catalog.create_table(table_identifier, pyarrow_table.schema) table.overwrite(pyarrow_table) @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), ], ) -def test_create_table_custom_sort_order(catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier) -> None: - database_name, _table_name = random_identifier - catalog.create_namespace(database_name) +@pytest.mark.parametrize( + "table_identifier", + [ + lazy_fixture("random_table_identifier"), + lazy_fixture("random_hierarchical_identifier"), + lazy_fixture("random_table_identifier_with_catalog"), + ], +) +def test_create_table_custom_sort_order(catalog: SqlCatalog, table_schema_nested: Schema, table_identifier: Identifier) -> None: + table_identifier_nocatalog = catalog.identifier_to_tuple_without_catalog(table_identifier) + namespace = Catalog.namespace_from(table_identifier_nocatalog) + catalog.create_namespace(namespace) order = SortOrder(SortField(source_id=2, transform=IdentityTransform(), null_order=NullOrder.NULLS_FIRST)) - table = catalog.create_table(random_identifier, table_schema_nested, sort_order=order) + table = catalog.create_table(table_identifier, table_schema_nested, sort_order=order) given_sort_order = table.sort_order() assert given_sort_order.order_id == 1, "Order ID must match" assert len(given_sort_order.fields) == 1, "Order must have 1 field" assert given_sort_order.fields[0].direction == SortDirection.ASC, "Direction must match" assert given_sort_order.fields[0].null_order == NullOrder.NULLS_FIRST, "Null order must match" assert isinstance(given_sort_order.fields[0].transform, IdentityTransform), "Transform must match" - catalog.drop_table(random_identifier) + catalog.drop_table(table_identifier) @pytest.mark.parametrize( - 'catalog', + "catalog", + [ + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), + ], +) +@pytest.mark.parametrize( + "table_identifier", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), + lazy_fixture("random_table_identifier"), + lazy_fixture("random_hierarchical_identifier"), + lazy_fixture("random_table_identifier_with_catalog"), ], ) def test_create_table_with_default_warehouse_location( - warehouse: Path, catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier + warehouse: Path, catalog: SqlCatalog, table_schema_nested: Schema, table_identifier: Identifier ) -> None: - database_name, _table_name = random_identifier - catalog.create_namespace(database_name) - catalog.create_table(random_identifier, table_schema_nested) - table = catalog.load_table(random_identifier) - assert table.identifier == (catalog.name,) + random_identifier + table_identifier_nocatalog = catalog.identifier_to_tuple_without_catalog(table_identifier) + namespace = Catalog.namespace_from(table_identifier_nocatalog) + catalog.create_namespace(namespace) + catalog.create_table(table_identifier, table_schema_nested) + table = catalog.load_table(table_identifier) + assert table.identifier == (catalog.name,) + table_identifier_nocatalog assert table.metadata_location.startswith(f"file://{warehouse}") assert os.path.exists(table.metadata_location[len("file://") :]) - catalog.drop_table(random_identifier) + catalog.drop_table(table_identifier) @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), ], ) -def test_create_duplicated_table(catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier) -> None: - database_name, _table_name = random_identifier - catalog.create_namespace(database_name) - catalog.create_table(random_identifier, table_schema_nested) +@pytest.mark.parametrize( + "table_identifier", + [ + lazy_fixture("random_table_identifier"), + lazy_fixture("random_hierarchical_identifier"), + lazy_fixture("random_table_identifier_with_catalog"), + ], +) +def test_create_table_with_given_location_removes_trailing_slash( + warehouse: Path, catalog: SqlCatalog, table_schema_nested: Schema, table_identifier: Identifier +) -> None: + table_identifier_nocatalog = catalog.identifier_to_tuple_without_catalog(table_identifier) + namespace = Catalog.namespace_from(table_identifier_nocatalog) + table_name = Catalog.table_name_from(table_identifier_nocatalog) + location = f"file://{warehouse}/{catalog.name}.db/{table_name}-given" + catalog.create_namespace(namespace) + catalog.create_table(table_identifier, table_schema_nested, location=f"{location}/") + table = catalog.load_table(table_identifier) + assert table.identifier == (catalog.name,) + table_identifier_nocatalog + assert table.metadata_location.startswith(f"file://{warehouse}") + assert os.path.exists(table.metadata_location[len("file://") :]) + assert table.location() == location + catalog.drop_table(table_identifier) + + +@pytest.mark.parametrize( + "catalog", + [ + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), + ], +) +@pytest.mark.parametrize( + "table_identifier", + [ + lazy_fixture("random_table_identifier"), + lazy_fixture("random_hierarchical_identifier"), + lazy_fixture("random_table_identifier_with_catalog"), + ], +) +def test_create_duplicated_table(catalog: SqlCatalog, table_schema_nested: Schema, table_identifier: Identifier) -> None: + table_identifier_nocatalog = catalog.identifier_to_tuple_without_catalog(table_identifier) + namespace = Catalog.namespace_from(table_identifier_nocatalog) + catalog.create_namespace(namespace) + catalog.create_table(table_identifier, table_schema_nested) with pytest.raises(TableAlreadyExistsError): - catalog.create_table(random_identifier, table_schema_nested) + catalog.create_table(table_identifier, table_schema_nested) @pytest.mark.parametrize( - 'catalog', + "catalog", + [ + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), + ], +) +@pytest.mark.parametrize( + "table_identifier", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), + lazy_fixture("random_table_identifier"), + lazy_fixture("random_hierarchical_identifier"), + lazy_fixture("random_table_identifier_with_catalog"), ], ) def test_create_table_if_not_exists_duplicated_table( - catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier + catalog: SqlCatalog, table_schema_nested: Schema, table_identifier: Identifier ) -> None: - database_name, _table_name = random_identifier - catalog.create_namespace(database_name) - table1 = catalog.create_table(random_identifier, table_schema_nested) - table2 = catalog.create_table_if_not_exists(random_identifier, table_schema_nested) + table_identifier_nocatalog = catalog.identifier_to_tuple_without_catalog(table_identifier) + namespace = Catalog.namespace_from(table_identifier_nocatalog) + catalog.create_namespace(namespace) + table1 = catalog.create_table(table_identifier, table_schema_nested) + table2 = catalog.create_table_if_not_exists(table_identifier, table_schema_nested) assert table1.identifier == table2.identifier @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), ], ) def test_create_table_with_non_existing_namespace(catalog: SqlCatalog, table_schema_nested: Schema, table_name: str) -> None: @@ -310,54 +456,72 @@ def test_create_table_with_non_existing_namespace(catalog: SqlCatalog, table_sch @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), ], ) def test_create_table_without_namespace(catalog: SqlCatalog, table_schema_nested: Schema, table_name: str) -> None: - with pytest.raises(ValueError): + with pytest.raises(NoSuchNamespaceError): catalog.create_table(table_name, table_schema_nested) @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), ], ) -def test_register_table(catalog: SqlCatalog, random_identifier: Identifier, metadata_location: str) -> None: - database_name, _table_name = random_identifier - catalog.create_namespace(database_name) - table = catalog.register_table(random_identifier, metadata_location) - assert table.identifier == (catalog.name,) + random_identifier +@pytest.mark.parametrize( + "table_identifier", + [ + lazy_fixture("random_table_identifier"), + lazy_fixture("random_hierarchical_identifier"), + lazy_fixture("random_table_identifier_with_catalog"), + ], +) +def test_register_table(catalog: SqlCatalog, table_identifier: Identifier, metadata_location: str) -> None: + table_identifier_nocatalog = catalog.identifier_to_tuple_without_catalog(table_identifier) + namespace = Catalog.namespace_from(table_identifier_nocatalog) + catalog.create_namespace(namespace) + table = catalog.register_table(table_identifier, metadata_location) + assert table.identifier == (catalog.name,) + table_identifier_nocatalog assert table.metadata_location == metadata_location assert os.path.exists(metadata_location) - catalog.drop_table(random_identifier) + catalog.drop_table(table_identifier) @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), ], ) -def test_register_existing_table(catalog: SqlCatalog, random_identifier: Identifier, metadata_location: str) -> None: - database_name, _table_name = random_identifier - catalog.create_namespace(database_name) - catalog.register_table(random_identifier, metadata_location) +@pytest.mark.parametrize( + "table_identifier", + [ + lazy_fixture("random_table_identifier"), + lazy_fixture("random_hierarchical_identifier"), + lazy_fixture("random_table_identifier_with_catalog"), + ], +) +def test_register_existing_table(catalog: SqlCatalog, table_identifier: Identifier, metadata_location: str) -> None: + table_identifier_nocatalog = catalog.identifier_to_tuple_without_catalog(table_identifier) + namespace = Catalog.namespace_from(table_identifier_nocatalog) + catalog.create_namespace(namespace) + catalog.register_table(table_identifier, metadata_location) with pytest.raises(TableAlreadyExistsError): - catalog.register_table(random_identifier, metadata_location) + catalog.register_table(table_identifier, metadata_location) @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), ], ) def test_register_table_with_non_existing_namespace(catalog: SqlCatalog, metadata_location: str, table_name: str) -> None: @@ -367,10 +531,10 @@ def test_register_table_with_non_existing_namespace(catalog: SqlCatalog, metadat @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), ], ) def test_register_table_without_namespace(catalog: SqlCatalog, metadata_location: str, table_name: str) -> None: @@ -379,35 +543,53 @@ def test_register_table_without_namespace(catalog: SqlCatalog, metadata_location @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), ], ) -def test_load_table(catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier) -> None: - database_name, _table_name = random_identifier - catalog.create_namespace(database_name) - table = catalog.create_table(random_identifier, table_schema_nested) - loaded_table = catalog.load_table(random_identifier) +@pytest.mark.parametrize( + "table_identifier", + [ + lazy_fixture("random_table_identifier"), + lazy_fixture("random_hierarchical_identifier"), + lazy_fixture("random_table_identifier_with_catalog"), + ], +) +def test_load_table(catalog: SqlCatalog, table_schema_nested: Schema, table_identifier: Identifier) -> None: + table_identifier_nocatalog = catalog.identifier_to_tuple_without_catalog(table_identifier) + namespace = Catalog.namespace_from(table_identifier_nocatalog) + catalog.create_namespace(namespace) + table = catalog.create_table(table_identifier, table_schema_nested) + loaded_table = catalog.load_table(table_identifier) assert table.identifier == loaded_table.identifier assert table.metadata_location == loaded_table.metadata_location assert table.metadata == loaded_table.metadata @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), ], ) -def test_load_table_from_self_identifier(catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier) -> None: - database_name, _table_name = random_identifier - catalog.create_namespace(database_name) - table = catalog.create_table(random_identifier, table_schema_nested) - intermediate = catalog.load_table(random_identifier) - assert intermediate.identifier == (catalog.name,) + random_identifier +@pytest.mark.parametrize( + "table_identifier", + [ + lazy_fixture("random_table_identifier"), + lazy_fixture("random_hierarchical_identifier"), + lazy_fixture("random_table_identifier_with_catalog"), + ], +) +def test_load_table_from_self_identifier(catalog: SqlCatalog, table_schema_nested: Schema, table_identifier: Identifier) -> None: + table_identifier_nocatalog = catalog.identifier_to_tuple_without_catalog(table_identifier) + namespace = Catalog.namespace_from(table_identifier_nocatalog) + catalog.create_namespace(namespace) + table = catalog.create_table(table_identifier, table_schema_nested) + intermediate = catalog.load_table(table_identifier) + assert intermediate.identifier == (catalog.name,) + table_identifier_nocatalog loaded_table = catalog.load_table(intermediate.identifier) assert table.identifier == loaded_table.identifier assert table.metadata_location == loaded_table.metadata_location @@ -415,286 +597,467 @@ def test_load_table_from_self_identifier(catalog: SqlCatalog, table_schema_neste @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), - lazy_fixture('catalog_sqlite_without_rowcount'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), + lazy_fixture("catalog_sqlite_without_rowcount"), ], ) -def test_drop_table(catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier) -> None: - database_name, _table_name = random_identifier - catalog.create_namespace(database_name) - table = catalog.create_table(random_identifier, table_schema_nested) - assert table.identifier == (catalog.name,) + random_identifier - catalog.drop_table(random_identifier) +@pytest.mark.parametrize( + "table_identifier", + [ + lazy_fixture("random_table_identifier"), + lazy_fixture("random_hierarchical_identifier"), + lazy_fixture("random_table_identifier_with_catalog"), + ], +) +def test_drop_table(catalog: SqlCatalog, table_schema_nested: Schema, table_identifier: Identifier) -> None: + table_identifier_nocatalog = catalog.identifier_to_tuple_without_catalog(table_identifier) + namespace = Catalog.namespace_from(table_identifier_nocatalog) + catalog.create_namespace(namespace) + table = catalog.create_table(table_identifier, table_schema_nested) + assert table.identifier == (catalog.name,) + table_identifier_nocatalog + catalog.drop_table(table_identifier) with pytest.raises(NoSuchTableError): - catalog.load_table(random_identifier) + catalog.load_table(table_identifier) @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), - lazy_fixture('catalog_sqlite_without_rowcount'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), + lazy_fixture("catalog_sqlite_without_rowcount"), ], ) -def test_drop_table_from_self_identifier(catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier) -> None: - database_name, _table_name = random_identifier - catalog.create_namespace(database_name) - table = catalog.create_table(random_identifier, table_schema_nested) - assert table.identifier == (catalog.name,) + random_identifier +@pytest.mark.parametrize( + "table_identifier", + [ + lazy_fixture("random_table_identifier"), + lazy_fixture("random_hierarchical_identifier"), + lazy_fixture("random_table_identifier_with_catalog"), + ], +) +def test_drop_table_from_self_identifier(catalog: SqlCatalog, table_schema_nested: Schema, table_identifier: Identifier) -> None: + table_identifier_nocatalog = catalog.identifier_to_tuple_without_catalog(table_identifier) + namespace = Catalog.namespace_from(table_identifier_nocatalog) + catalog.create_namespace(namespace) + table = catalog.create_table(table_identifier, table_schema_nested) + assert table.identifier == (catalog.name,) + table_identifier_nocatalog catalog.drop_table(table.identifier) with pytest.raises(NoSuchTableError): catalog.load_table(table.identifier) with pytest.raises(NoSuchTableError): - catalog.load_table(random_identifier) + catalog.load_table(table_identifier) @pytest.mark.parametrize( - 'catalog', + "catalog", + [ + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), + lazy_fixture("catalog_sqlite_without_rowcount"), + ], +) +@pytest.mark.parametrize( + "table_identifier", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), - lazy_fixture('catalog_sqlite_without_rowcount'), + lazy_fixture("random_table_identifier"), + lazy_fixture("random_hierarchical_identifier"), + lazy_fixture("random_table_identifier_with_catalog"), ], ) -def test_drop_table_that_does_not_exist(catalog: SqlCatalog, random_identifier: Identifier) -> None: +def test_drop_table_that_does_not_exist(catalog: SqlCatalog, table_identifier: Identifier) -> None: with pytest.raises(NoSuchTableError): - catalog.drop_table(random_identifier) + catalog.drop_table(table_identifier) @pytest.mark.parametrize( - 'catalog', + "catalog", + [ + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), + lazy_fixture("catalog_sqlite_without_rowcount"), + ], +) +@pytest.mark.parametrize( + "from_table_identifier", + [ + lazy_fixture("random_table_identifier"), + lazy_fixture("random_hierarchical_identifier"), + lazy_fixture("random_table_identifier_with_catalog"), + ], +) +@pytest.mark.parametrize( + "to_table_identifier", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), - lazy_fixture('catalog_sqlite_without_rowcount'), + lazy_fixture("another_random_table_identifier"), + lazy_fixture("another_random_hierarchical_identifier"), + lazy_fixture("another_random_table_identifier_with_catalog"), ], ) def test_rename_table( - catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier, another_random_identifier: Identifier + catalog: SqlCatalog, table_schema_nested: Schema, from_table_identifier: Identifier, to_table_identifier: Identifier ) -> None: - from_database_name, _from_table_name = random_identifier - to_database_name, _to_table_name = another_random_identifier - catalog.create_namespace(from_database_name) - catalog.create_namespace(to_database_name) - table = catalog.create_table(random_identifier, table_schema_nested) - assert table.identifier == (catalog.name,) + random_identifier - catalog.rename_table(random_identifier, another_random_identifier) - new_table = catalog.load_table(another_random_identifier) - assert new_table.identifier == (catalog.name,) + another_random_identifier + from_table_identifier_nocatalog = catalog.identifier_to_tuple_without_catalog(from_table_identifier) + to_table_identifier_nocatalog = catalog.identifier_to_tuple_without_catalog(to_table_identifier) + from_namespace = Catalog.namespace_from(from_table_identifier_nocatalog) + to_namespace = Catalog.namespace_from(to_table_identifier_nocatalog) + catalog.create_namespace(from_namespace) + catalog.create_namespace(to_namespace) + table = catalog.create_table(from_table_identifier, table_schema_nested) + assert table.identifier == (catalog.name,) + from_table_identifier_nocatalog + catalog.rename_table(from_table_identifier, to_table_identifier) + new_table = catalog.load_table(to_table_identifier) + assert new_table.identifier == (catalog.name,) + to_table_identifier_nocatalog assert new_table.metadata_location == table.metadata_location with pytest.raises(NoSuchTableError): - catalog.load_table(random_identifier) + catalog.load_table(from_table_identifier) @pytest.mark.parametrize( - 'catalog', + "catalog", + [ + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), + lazy_fixture("catalog_sqlite_without_rowcount"), + ], +) +@pytest.mark.parametrize( + "from_table_identifier", + [ + lazy_fixture("random_table_identifier"), + lazy_fixture("random_hierarchical_identifier"), + lazy_fixture("random_table_identifier_with_catalog"), + ], +) +@pytest.mark.parametrize( + "to_table_identifier", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), - lazy_fixture('catalog_sqlite_without_rowcount'), + lazy_fixture("another_random_table_identifier"), + lazy_fixture("another_random_hierarchical_identifier"), + lazy_fixture("another_random_table_identifier_with_catalog"), ], ) def test_rename_table_from_self_identifier( - catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier, another_random_identifier: Identifier + catalog: SqlCatalog, table_schema_nested: Schema, from_table_identifier: Identifier, to_table_identifier: Identifier ) -> None: - from_database_name, _from_table_name = random_identifier - to_database_name, _to_table_name = another_random_identifier - catalog.create_namespace(from_database_name) - catalog.create_namespace(to_database_name) - table = catalog.create_table(random_identifier, table_schema_nested) - assert table.identifier == (catalog.name,) + random_identifier - catalog.rename_table(table.identifier, another_random_identifier) - new_table = catalog.load_table(another_random_identifier) - assert new_table.identifier == (catalog.name,) + another_random_identifier + from_table_identifier_nocatalog = catalog.identifier_to_tuple_without_catalog(from_table_identifier) + to_table_identifier_nocatalog = catalog.identifier_to_tuple_without_catalog(to_table_identifier) + from_namespace = Catalog.namespace_from(from_table_identifier_nocatalog) + to_namespace = Catalog.namespace_from(to_table_identifier_nocatalog) + catalog.create_namespace(from_namespace) + catalog.create_namespace(to_namespace) + table = catalog.create_table(from_table_identifier, table_schema_nested) + assert table.identifier == (catalog.name,) + from_table_identifier_nocatalog + catalog.rename_table(table.identifier, to_table_identifier) + new_table = catalog.load_table(to_table_identifier) + assert new_table.identifier == (catalog.name,) + to_table_identifier_nocatalog assert new_table.metadata_location == table.metadata_location with pytest.raises(NoSuchTableError): catalog.load_table(table.identifier) with pytest.raises(NoSuchTableError): - catalog.load_table(random_identifier) + catalog.load_table(from_table_identifier) @pytest.mark.parametrize( - 'catalog', + "catalog", + [ + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), + lazy_fixture("catalog_sqlite_without_rowcount"), + ], +) +@pytest.mark.parametrize( + "from_table_identifier", + [ + lazy_fixture("random_table_identifier"), + lazy_fixture("random_hierarchical_identifier"), + lazy_fixture("random_table_identifier_with_catalog"), + ], +) +@pytest.mark.parametrize( + "to_table_identifier", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), - lazy_fixture('catalog_sqlite_without_rowcount'), + lazy_fixture("another_random_table_identifier"), + lazy_fixture("another_random_hierarchical_identifier"), + lazy_fixture("another_random_table_identifier_with_catalog"), ], ) def test_rename_table_to_existing_one( - catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier, another_random_identifier: Identifier + catalog: SqlCatalog, table_schema_nested: Schema, from_table_identifier: Identifier, to_table_identifier: Identifier ) -> None: - from_database_name, _from_table_name = random_identifier - to_database_name, _to_table_name = another_random_identifier - catalog.create_namespace(from_database_name) - catalog.create_namespace(to_database_name) - table = catalog.create_table(random_identifier, table_schema_nested) - assert table.identifier == (catalog.name,) + random_identifier - new_table = catalog.create_table(another_random_identifier, table_schema_nested) - assert new_table.identifier == (catalog.name,) + another_random_identifier + from_table_identifier_nocatalog = catalog.identifier_to_tuple_without_catalog(from_table_identifier) + to_table_identifier_nocatalog = catalog.identifier_to_tuple_without_catalog(to_table_identifier) + from_namespace = Catalog.namespace_from(from_table_identifier_nocatalog) + to_namespace = Catalog.namespace_from(to_table_identifier_nocatalog) + catalog.create_namespace(from_namespace) + catalog.create_namespace(to_namespace) + table = catalog.create_table(from_table_identifier, table_schema_nested) + assert table.identifier == (catalog.name,) + from_table_identifier_nocatalog + new_table = catalog.create_table(to_table_identifier, table_schema_nested) + assert new_table.identifier == (catalog.name,) + to_table_identifier_nocatalog with pytest.raises(TableAlreadyExistsError): - catalog.rename_table(random_identifier, another_random_identifier) + catalog.rename_table(from_table_identifier, to_table_identifier) @pytest.mark.parametrize( - 'catalog', + "catalog", + [ + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), + lazy_fixture("catalog_sqlite_without_rowcount"), + ], +) +@pytest.mark.parametrize( + "from_table_identifier", + [ + lazy_fixture("random_table_identifier"), + lazy_fixture("random_hierarchical_identifier"), + lazy_fixture("random_table_identifier_with_catalog"), + ], +) +@pytest.mark.parametrize( + "to_table_identifier", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), - lazy_fixture('catalog_sqlite_without_rowcount'), + lazy_fixture("another_random_table_identifier"), + lazy_fixture("another_random_hierarchical_identifier"), + lazy_fixture("another_random_table_identifier_with_catalog"), ], ) -def test_rename_missing_table(catalog: SqlCatalog, random_identifier: Identifier, another_random_identifier: Identifier) -> None: - to_database_name, _to_table_name = another_random_identifier - catalog.create_namespace(to_database_name) +def test_rename_missing_table(catalog: SqlCatalog, from_table_identifier: Identifier, to_table_identifier: Identifier) -> None: + to_table_identifier_nocatalog = catalog.identifier_to_tuple_without_catalog(to_table_identifier) + to_namespace = Catalog.namespace_from(to_table_identifier_nocatalog) + catalog.create_namespace(to_namespace) with pytest.raises(NoSuchTableError): - catalog.rename_table(random_identifier, another_random_identifier) + catalog.rename_table(from_table_identifier, to_table_identifier) @pytest.mark.parametrize( - 'catalog', + "catalog", + [ + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), + lazy_fixture("catalog_sqlite_without_rowcount"), + ], +) +@pytest.mark.parametrize( + "from_table_identifier", + [ + lazy_fixture("random_table_identifier"), + lazy_fixture("random_hierarchical_identifier"), + lazy_fixture("random_table_identifier_with_catalog"), + ], +) +@pytest.mark.parametrize( + "to_table_identifier", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), - lazy_fixture('catalog_sqlite_without_rowcount'), + lazy_fixture("another_random_table_identifier"), + lazy_fixture("another_random_hierarchical_identifier"), + lazy_fixture("another_random_table_identifier_with_catalog"), ], ) def test_rename_table_to_missing_namespace( - catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier, another_random_identifier: Identifier + catalog: SqlCatalog, table_schema_nested: Schema, from_table_identifier: Identifier, to_table_identifier: Identifier ) -> None: - from_database_name, _from_table_name = random_identifier - catalog.create_namespace(from_database_name) - table = catalog.create_table(random_identifier, table_schema_nested) - assert table.identifier == (catalog.name,) + random_identifier + from_table_identifier_nocatalog = catalog.identifier_to_tuple_without_catalog(from_table_identifier) + from_namespace = Catalog.namespace_from(from_table_identifier_nocatalog) + catalog.create_namespace(from_namespace) + table = catalog.create_table(from_table_identifier, table_schema_nested) + assert table.identifier == (catalog.name,) + from_table_identifier_nocatalog with pytest.raises(NoSuchNamespaceError): - catalog.rename_table(random_identifier, another_random_identifier) + catalog.rename_table(from_table_identifier, to_table_identifier) @pytest.mark.parametrize( - 'catalog', + "catalog", + [ + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), + ], +) +@pytest.mark.parametrize( + "table_identifier_1", + [ + lazy_fixture("random_table_identifier"), + lazy_fixture("random_hierarchical_identifier"), + lazy_fixture("random_table_identifier_with_catalog"), + ], +) +@pytest.mark.parametrize( + "table_identifier_2", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), + lazy_fixture("another_random_table_identifier"), + lazy_fixture("another_random_hierarchical_identifier"), + lazy_fixture("another_random_table_identifier_with_catalog"), ], ) def test_list_tables( - catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier, another_random_identifier: Identifier + catalog: SqlCatalog, table_schema_nested: Schema, table_identifier_1: Identifier, table_identifier_2: Identifier ) -> None: - database_name_1, _table_name_1 = random_identifier - database_name_2, _table_name_2 = another_random_identifier - catalog.create_namespace(database_name_1) - catalog.create_namespace(database_name_2) - catalog.create_table(random_identifier, table_schema_nested) - catalog.create_table(another_random_identifier, table_schema_nested) - identifier_list = catalog.list_tables(database_name_1) + table_identifier_1_nocatalog = catalog.identifier_to_tuple_without_catalog(table_identifier_1) + table_identifier_2_nocatalog = catalog.identifier_to_tuple_without_catalog(table_identifier_2) + namespace_1 = Catalog.namespace_from(table_identifier_1_nocatalog) + namespace_2 = Catalog.namespace_from(table_identifier_2_nocatalog) + catalog.create_namespace(namespace_1) + catalog.create_namespace(namespace_2) + catalog.create_table(table_identifier_1, table_schema_nested) + catalog.create_table(table_identifier_2, table_schema_nested) + identifier_list = catalog.list_tables(namespace_1) assert len(identifier_list) == 1 - assert random_identifier in identifier_list + assert table_identifier_1_nocatalog in identifier_list - identifier_list = catalog.list_tables(database_name_2) + identifier_list = catalog.list_tables(namespace_2) assert len(identifier_list) == 1 - assert another_random_identifier in identifier_list + assert table_identifier_2_nocatalog in identifier_list + + +@pytest.mark.parametrize( + "catalog", + [ + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), + ], +) +@pytest.mark.parametrize("namespace", [lazy_fixture("database_name"), lazy_fixture("hierarchical_namespace_name")]) +def test_list_tables_when_missing_namespace(catalog: SqlCatalog, namespace: str) -> None: + with pytest.raises(NoSuchNamespaceError): + catalog.list_tables(namespace) @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), ], ) -def test_create_namespace(catalog: SqlCatalog, database_name: str) -> None: +def test_create_namespace_if_not_exists(catalog: SqlCatalog, database_name: str) -> None: catalog.create_namespace(database_name) assert (database_name,) in catalog.list_namespaces() + catalog.create_namespace_if_not_exists(database_name) + assert (database_name,) in catalog.list_namespaces() @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), ], ) -def test_create_duplicate_namespace(catalog: SqlCatalog, database_name: str) -> None: - catalog.create_namespace(database_name) +@pytest.mark.parametrize("namespace", [lazy_fixture("database_name"), lazy_fixture("hierarchical_namespace_name")]) +def test_create_namespace(catalog: SqlCatalog, namespace: str) -> None: + catalog.create_namespace(namespace) + assert (Catalog.identifier_to_tuple(namespace)) in catalog.list_namespaces() + + +@pytest.mark.parametrize( + "catalog", + [ + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), + ], +) +@pytest.mark.parametrize("namespace", [lazy_fixture("database_name"), lazy_fixture("hierarchical_namespace_name")]) +def test_create_duplicate_namespace(catalog: SqlCatalog, namespace: str) -> None: + catalog.create_namespace(namespace) with pytest.raises(NamespaceAlreadyExistsError): - catalog.create_namespace(database_name) + catalog.create_namespace(namespace) @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), ], ) -def test_create_namespaces_sharing_same_prefix(catalog: SqlCatalog, database_name: str) -> None: - catalog.create_namespace(database_name + "_1") +@pytest.mark.parametrize("namespace", [lazy_fixture("database_name"), lazy_fixture("hierarchical_namespace_name")]) +def test_create_namespaces_sharing_same_prefix(catalog: SqlCatalog, namespace: str) -> None: + catalog.create_namespace(namespace + "_1") # Second namespace is a prefix of the first one, make sure it can be added. - catalog.create_namespace(database_name) + catalog.create_namespace(namespace) @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), ], ) -def test_create_namespace_with_comment_and_location(catalog: SqlCatalog, database_name: str) -> None: +@pytest.mark.parametrize("namespace", [lazy_fixture("database_name"), lazy_fixture("hierarchical_namespace_name")]) +def test_create_namespace_with_comment_and_location(catalog: SqlCatalog, namespace: str) -> None: test_location = "/test/location" test_properties = { "comment": "this is a test description", "location": test_location, } - catalog.create_namespace(namespace=database_name, properties=test_properties) + catalog.create_namespace(namespace=namespace, properties=test_properties) loaded_database_list = catalog.list_namespaces() - assert (database_name,) in loaded_database_list - properties = catalog.load_namespace_properties(database_name) + assert Catalog.identifier_to_tuple(namespace) in loaded_database_list + properties = catalog.load_namespace_properties(namespace) assert properties["comment"] == "this is a test description" assert properties["location"] == test_location @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), ], ) +@pytest.mark.parametrize("namespace", [lazy_fixture("database_name"), lazy_fixture("hierarchical_namespace_name")]) @pytest.mark.filterwarnings("ignore") -def test_create_namespace_with_null_properties(catalog: SqlCatalog, database_name: str) -> None: +def test_create_namespace_with_null_properties(catalog: SqlCatalog, namespace: str) -> None: with pytest.raises(IntegrityError): - catalog.create_namespace(namespace=database_name, properties={None: "value"}) # type: ignore + catalog.create_namespace(namespace=namespace, properties={None: "value"}) # type: ignore with pytest.raises(IntegrityError): - catalog.create_namespace(namespace=database_name, properties={"key": None}) + catalog.create_namespace(namespace=namespace, properties={"key": None}) + + +@pytest.mark.parametrize( + "catalog", + [ + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), + ], +) +@pytest.mark.parametrize("empty_namespace", ["", (), (""), ("", ""), " ", (" ")]) +def test_create_namespace_with_empty_identifier(catalog: SqlCatalog, empty_namespace: Any) -> None: + with pytest.raises(NoSuchNamespaceError): + catalog.create_namespace(empty_namespace) @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), ], ) -def test_list_namespaces(catalog: SqlCatalog, database_list: List[str]) -> None: - for database_name in database_list: - catalog.create_namespace(database_name) - db_list = catalog.list_namespaces() - for database_name in database_list: - assert (database_name,) in db_list - assert len(catalog.list_namespaces(database_name)) == 1 +@pytest.mark.parametrize("namespace_list", [lazy_fixture("database_list"), lazy_fixture("hierarchical_namespace_list")]) +def test_list_namespaces(catalog: SqlCatalog, namespace_list: List[str]) -> None: + for namespace in namespace_list: + catalog.create_namespace(namespace) + # Test global list + ns_list = catalog.list_namespaces() + for namespace in namespace_list: + assert Catalog.identifier_to_tuple(namespace) in ns_list + # Test individual namespace list + assert len(one_namespace := catalog.list_namespaces(namespace)) == 1 + assert Catalog.identifier_to_tuple(namespace) == one_namespace[0] @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), ], ) def test_list_non_existing_namespaces(catalog: SqlCatalog) -> None: @@ -703,66 +1066,77 @@ def test_list_non_existing_namespaces(catalog: SqlCatalog) -> None: @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), ], ) -def test_drop_namespace(catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier) -> None: - database_name, table_name = random_identifier - catalog.create_namespace(database_name) - assert (database_name,) in catalog.list_namespaces() - catalog.create_table((database_name, table_name), table_schema_nested) +@pytest.mark.parametrize( + "table_identifier", + [ + lazy_fixture("random_table_identifier"), + lazy_fixture("random_hierarchical_identifier"), + lazy_fixture("random_table_identifier_with_catalog"), + ], +) +def test_drop_namespace(catalog: SqlCatalog, table_schema_nested: Schema, table_identifier: Identifier) -> None: + table_identifier_nocatalog = catalog.identifier_to_tuple_without_catalog(table_identifier) + namespace = Catalog.namespace_from(table_identifier_nocatalog) + catalog.create_namespace(namespace) + assert namespace in catalog.list_namespaces() + catalog.create_table(table_identifier, table_schema_nested) with pytest.raises(NamespaceNotEmptyError): - catalog.drop_namespace(database_name) - catalog.drop_table((database_name, table_name)) - catalog.drop_namespace(database_name) - assert (database_name,) not in catalog.list_namespaces() + catalog.drop_namespace(namespace) + catalog.drop_table(table_identifier) + catalog.drop_namespace(namespace) + assert namespace not in catalog.list_namespaces() @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), ], ) -def test_load_namespace_properties(catalog: SqlCatalog, database_name: str) -> None: +@pytest.mark.parametrize("namespace", [lazy_fixture("database_name"), lazy_fixture("hierarchical_namespace_name")]) +def test_load_namespace_properties(catalog: SqlCatalog, namespace: str) -> None: warehouse_location = "/test/location" test_properties = { "comment": "this is a test description", - "location": f"{warehouse_location}/{database_name}.db", + "location": f"{warehouse_location}/{namespace}.db", "test_property1": "1", "test_property2": "2", "test_property3": "3", } - catalog.create_namespace(database_name, test_properties) - listed_properties = catalog.load_namespace_properties(database_name) + catalog.create_namespace(namespace, test_properties) + listed_properties = catalog.load_namespace_properties(namespace) for k, v in listed_properties.items(): assert k in test_properties assert v == test_properties[k] @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), ], ) -def test_load_empty_namespace_properties(catalog: SqlCatalog, database_name: str) -> None: - catalog.create_namespace(database_name) - listed_properties = catalog.load_namespace_properties(database_name) +@pytest.mark.parametrize("namespace", [lazy_fixture("database_name"), lazy_fixture("hierarchical_namespace_name")]) +def test_load_empty_namespace_properties(catalog: SqlCatalog, namespace: str) -> None: + catalog.create_namespace(namespace) + listed_properties = catalog.load_namespace_properties(namespace) assert listed_properties == {"exists": "true"} @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), ], ) def test_load_namespace_properties_non_existing_namespace(catalog: SqlCatalog) -> None: @@ -771,25 +1145,26 @@ def test_load_namespace_properties_non_existing_namespace(catalog: SqlCatalog) - @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), ], ) -def test_update_namespace_properties(catalog: SqlCatalog, database_name: str) -> None: +@pytest.mark.parametrize("namespace", [lazy_fixture("database_name"), lazy_fixture("hierarchical_namespace_name")]) +def test_update_namespace_properties(catalog: SqlCatalog, namespace: str) -> None: warehouse_location = "/test/location" test_properties = { "comment": "this is a test description", - "location": f"{warehouse_location}/{database_name}.db", + "location": f"{warehouse_location}/{namespace}.db", "test_property1": "1", "test_property2": "2", "test_property3": "3", } removals = {"test_property1", "test_property2", "test_property3", "should_not_removed"} updates = {"test_property4": "4", "test_property5": "5", "comment": "updated test description"} - catalog.create_namespace(database_name, test_properties) - update_report = catalog.update_namespace_properties(database_name, removals, updates) + catalog.create_namespace(namespace, test_properties) + update_report = catalog.update_namespace_properties(namespace, removals, updates) for k in updates.keys(): assert k in update_report.updated for k in removals: @@ -797,21 +1172,30 @@ def test_update_namespace_properties(catalog: SqlCatalog, database_name: str) -> assert k in update_report.missing else: assert k in update_report.removed - assert "updated test description" == catalog.load_namespace_properties(database_name)["comment"] + assert "updated test description" == catalog.load_namespace_properties(namespace)["comment"] @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), - lazy_fixture('catalog_sqlite_without_rowcount'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), + lazy_fixture("catalog_sqlite_without_rowcount"), ], ) -def test_commit_table(catalog: SqlCatalog, table_schema_nested: Schema, random_identifier: Identifier) -> None: - database_name, _table_name = random_identifier - catalog.create_namespace(database_name) - table = catalog.create_table(random_identifier, table_schema_nested) +@pytest.mark.parametrize( + "table_identifier", + [ + lazy_fixture("random_table_identifier"), + lazy_fixture("random_hierarchical_identifier"), + lazy_fixture("random_table_identifier_with_catalog"), + ], +) +def test_commit_table(catalog: SqlCatalog, table_schema_nested: Schema, table_identifier: Identifier) -> None: + table_identifier_nocatalog = catalog.identifier_to_tuple_without_catalog(table_identifier) + namespace = Catalog.namespace_from(table_identifier_nocatalog) + catalog.create_namespace(namespace) + table = catalog.create_table(table_identifier, table_schema_nested) assert catalog._parse_metadata_version(table.metadata_location) == 0 assert table.metadata.current_schema_id == 0 @@ -834,18 +1218,27 @@ def test_commit_table(catalog: SqlCatalog, table_schema_nested: Schema, random_i @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), - lazy_fixture('catalog_sqlite_without_rowcount'), - lazy_fixture('catalog_sqlite_fsspec'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), + lazy_fixture("catalog_sqlite_without_rowcount"), + lazy_fixture("catalog_sqlite_fsspec"), ], ) -def test_append_table(catalog: SqlCatalog, table_schema_simple: Schema, random_identifier: Identifier) -> None: - database_name, _table_name = random_identifier - catalog.create_namespace(database_name) - table = catalog.create_table(random_identifier, table_schema_simple) +@pytest.mark.parametrize( + "table_identifier", + [ + lazy_fixture("random_table_identifier"), + lazy_fixture("random_hierarchical_identifier"), + lazy_fixture("random_table_identifier_with_catalog"), + ], +) +def test_append_table(catalog: SqlCatalog, table_schema_simple: Schema, table_identifier: Identifier) -> None: + table_identifier_nocatalog = catalog.identifier_to_tuple_without_catalog(table_identifier) + namespace = Catalog.namespace_from(table_identifier_nocatalog) + catalog.create_namespace(namespace) + table = catalog.create_table(table_identifier, table_schema_simple) df = pa.Table.from_pydict( { @@ -865,28 +1258,37 @@ def test_append_table(catalog: SqlCatalog, table_schema_simple: Schema, random_i assert table.metadata.snapshots[0].sequence_number == 1 assert table.metadata.snapshots[0].summary is not None assert table.metadata.snapshots[0].summary.operation == Operation.APPEND - assert table.metadata.snapshots[0].summary['added-data-files'] == '1' - assert table.metadata.snapshots[0].summary['added-records'] == '1' - assert table.metadata.snapshots[0].summary['total-data-files'] == '1' - assert table.metadata.snapshots[0].summary['total-records'] == '1' + assert table.metadata.snapshots[0].summary["added-data-files"] == "1" + assert table.metadata.snapshots[0].summary["added-records"] == "1" + assert table.metadata.snapshots[0].summary["total-data-files"] == "1" + assert table.metadata.snapshots[0].summary["total-records"] == "1" # read back the data assert df == table.scan().to_arrow() @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), - lazy_fixture('catalog_sqlite_without_rowcount'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), + lazy_fixture("catalog_sqlite_without_rowcount"), ], ) -def test_concurrent_commit_table(catalog: SqlCatalog, table_schema_simple: Schema, random_identifier: Identifier) -> None: - database_name, _table_name = random_identifier - catalog.create_namespace(database_name) - table_a = catalog.create_table(random_identifier, table_schema_simple) - table_b = catalog.load_table(random_identifier) +@pytest.mark.parametrize( + "table_identifier", + [ + lazy_fixture("random_table_identifier"), + lazy_fixture("random_hierarchical_identifier"), + lazy_fixture("random_table_identifier_with_catalog"), + ], +) +def test_concurrent_commit_table(catalog: SqlCatalog, table_schema_simple: Schema, table_identifier: Identifier) -> None: + table_identifier_nocatalog = catalog.identifier_to_tuple_without_catalog(table_identifier) + namespace = Catalog.namespace_from(table_identifier_nocatalog) + catalog.create_namespace(namespace) + table_a = catalog.create_table(table_identifier, table_schema_simple) + table_b = catalog.load_table(table_identifier) with table_a.update_schema() as update: update.add_column(path="b", field_type=IntegerType()) @@ -898,11 +1300,11 @@ def test_concurrent_commit_table(catalog: SqlCatalog, table_schema_simple: Schem @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), - lazy_fixture('catalog_sqlite_without_rowcount'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), + lazy_fixture("catalog_sqlite_without_rowcount"), ], ) @pytest.mark.parametrize("format_version", [1, 2]) @@ -921,7 +1323,7 @@ def test_write_and_evolve(catalog: SqlCatalog, format_version: int) -> None: pa_table = pa.Table.from_pydict( { - 'foo': ['a', None, 'z'], + "foo": ["a", None, "z"], }, schema=pa.schema([pa.field("foo", pa.string(), nullable=True)]), ) @@ -930,8 +1332,8 @@ def test_write_and_evolve(catalog: SqlCatalog, format_version: int) -> None: pa_table_with_column = pa.Table.from_pydict( { - 'foo': ['a', None, 'z'], - 'bar': [19, None, 25], + "foo": ["a", None, "z"], + "bar": [19, None, 25], }, schema=pa.schema([ pa.field("foo", pa.string(), nullable=True), @@ -949,55 +1351,142 @@ def test_write_and_evolve(catalog: SqlCatalog, format_version: int) -> None: @pytest.mark.parametrize( - 'catalog', + "catalog", + [ + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), + lazy_fixture("catalog_sqlite_without_rowcount"), + ], +) +@pytest.mark.parametrize("format_version", [1, 2]) +def test_create_table_transaction(catalog: SqlCatalog, format_version: int) -> None: + identifier = f"default.arrow_create_table_transaction_{catalog.name}_{format_version}" + try: + catalog.create_namespace("default") + except NamespaceAlreadyExistsError: + pass + + try: + catalog.drop_table(identifier=identifier) + except NoSuchTableError: + pass + + pa_table = pa.Table.from_pydict( + { + "foo": ["a", None, "z"], + }, + schema=pa.schema([pa.field("foo", pa.string(), nullable=True)]), + ) + + pa_table_with_column = pa.Table.from_pydict( + { + "foo": ["a", None, "z"], + "bar": [19, None, 25], + }, + schema=pa.schema([ + pa.field("foo", pa.string(), nullable=True), + pa.field("bar", pa.int32(), nullable=True), + ]), + ) + + with catalog.create_table_transaction( + identifier=identifier, schema=pa_table.schema, properties={"format-version": str(format_version)} + ) as txn: + with txn.update_snapshot().fast_append() as snapshot_update: + for data_file in _dataframe_to_data_files(table_metadata=txn.table_metadata, df=pa_table, io=txn._table.io): + snapshot_update.append_data_file(data_file) + + with txn.update_schema() as schema_txn: + schema_txn.union_by_name(pa_table_with_column.schema) + + with txn.update_snapshot().fast_append() as snapshot_update: + for data_file in _dataframe_to_data_files( + table_metadata=txn.table_metadata, df=pa_table_with_column, io=txn._table.io + ): + snapshot_update.append_data_file(data_file) + + tbl = catalog.load_table(identifier=identifier) + assert tbl.format_version == format_version + assert len(tbl.scan().to_arrow()) == 6 + + +@pytest.mark.parametrize( + "catalog", + [ + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), + lazy_fixture("catalog_sqlite_without_rowcount"), + ], +) +@pytest.mark.parametrize( + "table_identifier", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), - lazy_fixture('catalog_sqlite_without_rowcount'), + lazy_fixture("random_table_identifier"), + lazy_fixture("random_hierarchical_identifier"), + lazy_fixture("random_table_identifier_with_catalog"), ], ) -def test_table_properties_int_value(catalog: SqlCatalog, table_schema_simple: Schema, random_identifier: Identifier) -> None: +def test_table_properties_int_value(catalog: SqlCatalog, table_schema_simple: Schema, table_identifier: Identifier) -> None: # table properties can be set to int, but still serialized to string - database_name, _table_name = random_identifier - catalog.create_namespace(database_name) + table_identifier_nocatalog = catalog.identifier_to_tuple_without_catalog(table_identifier) + namespace = Catalog.namespace_from(table_identifier_nocatalog) + catalog.create_namespace(namespace) property_with_int = {"property_name": 42} - table = catalog.create_table(random_identifier, table_schema_simple, properties=property_with_int) + table = catalog.create_table(table_identifier, table_schema_simple, properties=property_with_int) assert isinstance(table.properties["property_name"], str) @pytest.mark.parametrize( - 'catalog', + "catalog", + [ + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), + lazy_fixture("catalog_sqlite_without_rowcount"), + ], +) +@pytest.mark.parametrize( + "table_identifier", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), - lazy_fixture('catalog_sqlite_without_rowcount'), + lazy_fixture("random_table_identifier"), + lazy_fixture("random_hierarchical_identifier"), + lazy_fixture("random_table_identifier_with_catalog"), ], ) def test_table_properties_raise_for_none_value( - catalog: SqlCatalog, table_schema_simple: Schema, random_identifier: Identifier + catalog: SqlCatalog, table_schema_simple: Schema, table_identifier: Identifier ) -> None: - database_name, _table_name = random_identifier - catalog.create_namespace(database_name) + table_identifier_nocatalog = catalog.identifier_to_tuple_without_catalog(table_identifier) + namespace = Catalog.namespace_from(table_identifier_nocatalog) + catalog.create_namespace(namespace) property_with_none = {"property_name": None} with pytest.raises(ValidationError) as exc_info: - _ = catalog.create_table(random_identifier, table_schema_simple, properties=property_with_none) + _ = catalog.create_table(table_identifier, table_schema_simple, properties=property_with_none) assert "None type is not a supported value in properties: property_name" in str(exc_info.value) @pytest.mark.parametrize( - 'catalog', + "catalog", [ - lazy_fixture('catalog_memory'), - lazy_fixture('catalog_sqlite'), + lazy_fixture("catalog_memory"), + lazy_fixture("catalog_sqlite"), ], ) -def test_table_exists(catalog: SqlCatalog, table_schema_simple: Schema, random_identifier: Identifier) -> None: - database_name, _table_name = random_identifier - catalog.create_namespace(database_name) - catalog.create_table(random_identifier, table_schema_simple, properties={"format-version": "2"}) - existing_table = random_identifier +@pytest.mark.parametrize( + "table_identifier", + [ + lazy_fixture("random_table_identifier"), + lazy_fixture("random_hierarchical_identifier"), + lazy_fixture("random_table_identifier_with_catalog"), + ], +) +def test_table_exists(catalog: SqlCatalog, table_schema_simple: Schema, table_identifier: Identifier) -> None: + table_identifier_nocatalog = catalog.identifier_to_tuple_without_catalog(table_identifier) + namespace = Catalog.namespace_from(table_identifier_nocatalog) + catalog.create_namespace(namespace) + catalog.create_table(table_identifier, table_schema_simple, properties={"format-version": "2"}) + existing_table = table_identifier # Act and Assert for an existing table assert catalog.table_exists(existing_table) is True # Act and Assert for a non-existing table - assert catalog.table_exists(('non', 'exist')) is False + assert catalog.table_exists(("non", "exist")) is False diff --git a/tests/conftest.py b/tests/conftest.py index 6679543694..d3f23689a2 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -324,9 +324,9 @@ def pyarrow_schema_simple_without_ids() -> "pa.Schema": import pyarrow as pa return pa.schema([ - pa.field('foo', pa.string(), nullable=True), - pa.field('bar', pa.int32(), nullable=False), - pa.field('baz', pa.bool_(), nullable=True), + pa.field("foo", pa.string(), nullable=True), + pa.field("bar", pa.int32(), nullable=False), + pa.field("baz", pa.bool_(), nullable=True), ]) @@ -335,12 +335,12 @@ def pyarrow_schema_nested_without_ids() -> "pa.Schema": import pyarrow as pa return pa.schema([ - pa.field('foo', pa.string(), nullable=False), - pa.field('bar', pa.int32(), nullable=False), - pa.field('baz', pa.bool_(), nullable=True), - pa.field('qux', pa.list_(pa.string()), nullable=False), + pa.field("foo", pa.string(), nullable=False), + pa.field("bar", pa.int32(), nullable=False), + pa.field("baz", pa.bool_(), nullable=True), + pa.field("qux", pa.list_(pa.string()), nullable=False), pa.field( - 'quux', + "quux", pa.map_( pa.string(), pa.map_(pa.string(), pa.int32()), @@ -348,20 +348,20 @@ def pyarrow_schema_nested_without_ids() -> "pa.Schema": nullable=False, ), pa.field( - 'location', + "location", pa.list_( pa.struct([ - pa.field('latitude', pa.float32(), nullable=False), - pa.field('longitude', pa.float32(), nullable=False), + pa.field("latitude", pa.float32(), nullable=False), + pa.field("longitude", pa.float32(), nullable=False), ]), ), nullable=False, ), pa.field( - 'person', + "person", pa.struct([ - pa.field('name', pa.string(), nullable=True), - pa.field('age', pa.int32(), nullable=False), + pa.field("name", pa.string(), nullable=True), + pa.field("age", pa.int32(), nullable=False), ]), nullable=True, ), @@ -1878,6 +1878,19 @@ def database_list(database_name: str) -> List[str]: return [f"{database_name}_{idx}" for idx in range(NUM_TABLES)] +@pytest.fixture() +def hierarchical_namespace_name() -> str: + prefix = "my_iceberg_ns-" + random_tag1 = "".join(choice(string.ascii_letters) for _ in range(RANDOM_LENGTH)) + random_tag2 = "".join(choice(string.ascii_letters) for _ in range(RANDOM_LENGTH)) + return ".".join([prefix + random_tag1, prefix + random_tag2]).lower() + + +@pytest.fixture() +def hierarchical_namespace_list(hierarchical_namespace_name: str) -> List[str]: + return [f"{hierarchical_namespace_name}_{idx}" for idx in range(NUM_TABLES)] + + BUCKET_NAME = "test_bucket" TABLE_METADATA_LOCATION_REGEX = re.compile( r"""s3://test_bucket/my_iceberg_database-[a-z]{20}.db/ @@ -2068,31 +2081,31 @@ def spark() -> "SparkSession": TEST_DATA_WITH_NULL = { - 'bool': [False, None, True], - 'string': ['a', None, 'z'], + "bool": [False, None, True], + "string": ["a", None, "z"], # Go over the 16 bytes to kick in truncation - 'string_long': ['a' * 22, None, 'z' * 22], - 'int': [1, None, 9], - 'long': [1, None, 9], - 'float': [0.0, None, 0.9], - 'double': [0.0, None, 0.9], + "string_long": ["a" * 22, None, "z" * 22], + "int": [1, None, 9], + "long": [1, None, 9], + "float": [0.0, None, 0.9], + "double": [0.0, None, 0.9], # 'time': [1_000_000, None, 3_000_000], # Example times: 1s, none, and 3s past midnight #Spark does not support time fields - 'timestamp': [datetime(2023, 1, 1, 19, 25, 00), None, datetime(2023, 3, 1, 19, 25, 00)], - 'timestamptz': [ + "timestamp": [datetime(2023, 1, 1, 19, 25, 00), None, datetime(2023, 3, 1, 19, 25, 00)], + "timestamptz": [ datetime(2023, 1, 1, 19, 25, 00, tzinfo=timezone.utc), None, datetime(2023, 3, 1, 19, 25, 00, tzinfo=timezone.utc), ], - 'date': [date(2023, 1, 1), None, date(2023, 3, 1)], + "date": [date(2023, 1, 1), None, date(2023, 3, 1)], # Not supported by Spark # 'time': [time(1, 22, 0), None, time(19, 25, 0)], # Not natively supported by Arrow # 'uuid': [uuid.UUID('00000000-0000-0000-0000-000000000000').bytes, None, uuid.UUID('11111111-1111-1111-1111-111111111111').bytes], - 'binary': [b'\01', None, b'\22'], - 'fixed': [ - uuid.UUID('00000000-0000-0000-0000-000000000000').bytes, + "binary": [b"\01", None, b"\22"], + "fixed": [ + uuid.UUID("00000000-0000-0000-0000-000000000000").bytes, None, - uuid.UUID('11111111-1111-1111-1111-111111111111').bytes, + uuid.UUID("11111111-1111-1111-1111-111111111111").bytes, ], } @@ -2145,3 +2158,46 @@ def arrow_table_with_only_nulls(pa_schema: "pa.Schema") -> "pa.Table": import pyarrow as pa return pa.Table.from_pylist([{}, {}], schema=pa_schema) + + +@pytest.fixture(scope="session") +def arrow_table_date_timestamps() -> "pa.Table": + """Pyarrow table with only date, timestamp and timestamptz values.""" + import pyarrow as pa + + return pa.Table.from_pydict( + { + "date": [date(2023, 12, 31), date(2024, 1, 1), date(2024, 1, 31), date(2024, 2, 1), date(2024, 2, 1), None], + "timestamp": [ + datetime(2023, 12, 31, 0, 0, 0), + datetime(2024, 1, 1, 0, 0, 0), + datetime(2024, 1, 31, 0, 0, 0), + datetime(2024, 2, 1, 0, 0, 0), + datetime(2024, 2, 1, 6, 0, 0), + None, + ], + "timestamptz": [ + datetime(2023, 12, 31, 0, 0, 0, tzinfo=timezone.utc), + datetime(2024, 1, 1, 0, 0, 0, tzinfo=timezone.utc), + datetime(2024, 1, 31, 0, 0, 0, tzinfo=timezone.utc), + datetime(2024, 2, 1, 0, 0, 0, tzinfo=timezone.utc), + datetime(2024, 2, 1, 6, 0, 0, tzinfo=timezone.utc), + None, + ], + }, + schema=pa.schema([ + ("date", pa.date32()), + ("timestamp", pa.timestamp(unit="us")), + ("timestamptz", pa.timestamp(unit="us", tz="UTC")), + ]), + ) + + +@pytest.fixture(scope="session") +def arrow_table_date_timestamps_schema() -> Schema: + """Pyarrow table Schema with only date, timestamp and timestamptz values.""" + return Schema( + NestedField(field_id=1, name="date", field_type=DateType(), required=False), + NestedField(field_id=2, name="timestamp", field_type=TimestampType(), required=False), + NestedField(field_id=3, name="timestamptz", field_type=TimestamptzType(), required=False), + ) diff --git a/tests/expressions/test_expressions.py b/tests/expressions/test_expressions.py index f277672d87..87856a04f6 100644 --- a/tests/expressions/test_expressions.py +++ b/tests/expressions/test_expressions.py @@ -1152,11 +1152,11 @@ def test_above_long_bounds_greater_than_or_equal( def test_eq_bound_expression(bound_reference_str: BoundReference[str]) -> None: - assert BoundEqualTo(term=bound_reference_str, literal=literal('a')) != BoundGreaterThanOrEqual( - term=bound_reference_str, literal=literal('a') + assert BoundEqualTo(term=bound_reference_str, literal=literal("a")) != BoundGreaterThanOrEqual( + term=bound_reference_str, literal=literal("a") ) - assert BoundEqualTo(term=bound_reference_str, literal=literal('a')) == BoundEqualTo( - term=bound_reference_str, literal=literal('a') + assert BoundEqualTo(term=bound_reference_str, literal=literal("a")) == BoundEqualTo( + term=bound_reference_str, literal=literal("a") ) diff --git a/tests/integration/test_add_files.py b/tests/integration/test_add_files.py index 0de5d5f4ce..84729fcca4 100644 --- a/tests/integration/test_add_files.py +++ b/tests/integration/test_add_files.py @@ -17,7 +17,7 @@ # pylint:disable=redefined-outer-name from datetime import date -from typing import Optional +from typing import Iterator, Optional import pyarrow as pa import pyarrow.parquet as pq @@ -65,10 +65,10 @@ ) ARROW_SCHEMA_WITH_IDS = pa.schema([ - pa.field('foo', pa.bool_(), nullable=False, metadata={"PARQUET:field_id": "1"}), - pa.field('bar', pa.string(), nullable=False, metadata={"PARQUET:field_id": "2"}), - pa.field('baz', pa.int32(), nullable=False, metadata={"PARQUET:field_id": "3"}), - pa.field('qux', pa.date32(), nullable=False, metadata={"PARQUET:field_id": "4"}), + pa.field("foo", pa.bool_(), nullable=False, metadata={"PARQUET:field_id": "1"}), + pa.field("bar", pa.string(), nullable=False, metadata={"PARQUET:field_id": "2"}), + pa.field("baz", pa.int32(), nullable=False, metadata={"PARQUET:field_id": "3"}), + pa.field("qux", pa.date32(), nullable=False, metadata={"PARQUET:field_id": "4"}), ]) @@ -122,8 +122,13 @@ def _create_table( return tbl +@pytest.fixture(name="format_version", params=[pytest.param(1, id="format_version=1"), pytest.param(2, id="format_version=2")]) +def format_version_fixure(request: pytest.FixtureRequest) -> Iterator[int]: + """Fixture to run tests with different table format versions.""" + yield request.param + + @pytest.mark.integration -@pytest.mark.parametrize("format_version", [1, 2]) def test_add_files_to_unpartitioned_table(spark: SparkSession, session_catalog: Catalog, format_version: int) -> None: identifier = f"default.unpartitioned_table_v{format_version}" tbl = _create_table(session_catalog, identifier, format_version) @@ -163,7 +168,6 @@ def test_add_files_to_unpartitioned_table(spark: SparkSession, session_catalog: @pytest.mark.integration -@pytest.mark.parametrize("format_version", [1, 2]) def test_add_files_to_unpartitioned_table_raises_file_not_found( spark: SparkSession, session_catalog: Catalog, format_version: int ) -> None: @@ -184,7 +188,6 @@ def test_add_files_to_unpartitioned_table_raises_file_not_found( @pytest.mark.integration -@pytest.mark.parametrize("format_version", [1, 2]) def test_add_files_to_unpartitioned_table_raises_has_field_ids( spark: SparkSession, session_catalog: Catalog, format_version: int ) -> None: @@ -205,7 +208,6 @@ def test_add_files_to_unpartitioned_table_raises_has_field_ids( @pytest.mark.integration -@pytest.mark.parametrize("format_version", [1, 2]) def test_add_files_to_unpartitioned_table_with_schema_updates( spark: SparkSession, session_catalog: Catalog, format_version: int ) -> None: @@ -263,7 +265,6 @@ def test_add_files_to_unpartitioned_table_with_schema_updates( @pytest.mark.integration -@pytest.mark.parametrize("format_version", [1, 2]) def test_add_files_to_partitioned_table(spark: SparkSession, session_catalog: Catalog, format_version: int) -> None: identifier = f"default.partitioned_table_v{format_version}" @@ -335,7 +336,6 @@ def test_add_files_to_partitioned_table(spark: SparkSession, session_catalog: Ca @pytest.mark.integration -@pytest.mark.parametrize("format_version", [1, 2]) def test_add_files_to_bucket_partitioned_table_fails(spark: SparkSession, session_catalog: Catalog, format_version: int) -> None: identifier = f"default.partitioned_table_bucket_fails_v{format_version}" @@ -378,7 +378,6 @@ def test_add_files_to_bucket_partitioned_table_fails(spark: SparkSession, sessio @pytest.mark.integration -@pytest.mark.parametrize("format_version", [1, 2]) def test_add_files_to_partitioned_table_fails_with_lower_and_upper_mismatch( spark: SparkSession, session_catalog: Catalog, format_version: int ) -> None: @@ -424,3 +423,28 @@ def test_add_files_to_partitioned_table_fails_with_lower_and_upper_mismatch( "Cannot infer partition value from parquet metadata as there are more than one partition values for Partition Field: baz. lower_value=123, upper_value=124" in str(exc_info.value) ) + + +@pytest.mark.integration +def test_add_files_snapshot_properties(spark: SparkSession, session_catalog: Catalog, format_version: int) -> None: + identifier = f"default.unpartitioned_table_v{format_version}" + tbl = _create_table(session_catalog, identifier, format_version) + + file_paths = [f"s3://warehouse/default/unpartitioned/v{format_version}/test-{i}.parquet" for i in range(5)] + # write parquet files + for file_path in file_paths: + fo = tbl.io.new_output(file_path) + with fo.create(overwrite=True) as fos: + with pq.ParquetWriter(fos, schema=ARROW_SCHEMA) as writer: + writer.write_table(ARROW_TABLE) + + # add the parquet files as data files + tbl.add_files(file_paths=file_paths, snapshot_properties={"snapshot_prop_a": "test_prop_a"}) + + # NameMapping must have been set to enable reads + assert tbl.name_mapping() is not None + + summary = spark.sql(f"SELECT * FROM {identifier}.snapshots;").collect()[0].summary + + assert "snapshot_prop_a" in summary + assert summary["snapshot_prop_a"] == "test_prop_a" diff --git a/tests/integration/test_inspect_table.py b/tests/integration/test_inspect_table.py index a884f9d4c0..1f2b9a3ead 100644 --- a/tests/integration/test_inspect_table.py +++ b/tests/integration/test_inspect_table.py @@ -88,45 +88,45 @@ def test_inspect_snapshots( df = tbl.inspect.snapshots() assert df.column_names == [ - 'committed_at', - 'snapshot_id', - 'parent_id', - 'operation', - 'manifest_list', - 'summary', + "committed_at", + "snapshot_id", + "parent_id", + "operation", + "manifest_list", + "summary", ] - for committed_at in df['committed_at']: + for committed_at in df["committed_at"]: assert isinstance(committed_at.as_py(), datetime) - for snapshot_id in df['snapshot_id']: + for snapshot_id in df["snapshot_id"]: assert isinstance(snapshot_id.as_py(), int) - assert df['parent_id'][0].as_py() is None - assert df['parent_id'][1:] == df['snapshot_id'][:2] + assert df["parent_id"][0].as_py() is None + assert df["parent_id"][1:] == df["snapshot_id"][:2] - assert [operation.as_py() for operation in df['operation']] == ['append', 'overwrite', 'append'] + assert [operation.as_py() for operation in df["operation"]] == ["append", "overwrite", "append"] - for manifest_list in df['manifest_list']: + for manifest_list in df["manifest_list"]: assert manifest_list.as_py().startswith("s3://") - assert df['summary'][0].as_py() == [ - ('added-files-size', '5459'), - ('added-data-files', '1'), - ('added-records', '3'), - ('total-data-files', '1'), - ('total-delete-files', '0'), - ('total-records', '3'), - ('total-files-size', '5459'), - ('total-position-deletes', '0'), - ('total-equality-deletes', '0'), + assert df["summary"][0].as_py() == [ + ("added-files-size", "5459"), + ("added-data-files", "1"), + ("added-records", "3"), + ("total-data-files", "1"), + ("total-delete-files", "0"), + ("total-records", "3"), + ("total-files-size", "5459"), + ("total-position-deletes", "0"), + ("total-equality-deletes", "0"), ] lhs = spark.table(f"{identifier}.snapshots").toPandas() rhs = df.to_pandas() for column in df.column_names: for left, right in zip(lhs[column].to_list(), rhs[column].to_list()): - if column == 'summary': + if column == "summary": # Arrow returns a list of tuples, instead of a dict right = dict(right) @@ -150,29 +150,29 @@ def test_inspect_entries( def check_pyiceberg_df_equals_spark_df(df: pa.Table, spark_df: DataFrame) -> None: assert df.column_names == [ - 'status', - 'snapshot_id', - 'sequence_number', - 'file_sequence_number', - 'data_file', - 'readable_metrics', + "status", + "snapshot_id", + "sequence_number", + "file_sequence_number", + "data_file", + "readable_metrics", ] # Make sure that they are filled properly - for int_column in ['status', 'snapshot_id', 'sequence_number', 'file_sequence_number']: + for int_column in ["status", "snapshot_id", "sequence_number", "file_sequence_number"]: for value in df[int_column]: assert isinstance(value.as_py(), int) - for snapshot_id in df['snapshot_id']: + for snapshot_id in df["snapshot_id"]: assert isinstance(snapshot_id.as_py(), int) lhs = df.to_pandas() rhs = spark_df.toPandas() for column in df.column_names: for left, right in zip(lhs[column].to_list(), rhs[column].to_list()): - if column == 'data_file': + if column == "data_file": for df_column in left.keys(): - if df_column == 'partition': + if df_column == "partition": # Spark leaves out the partition if the table is unpartitioned continue @@ -183,20 +183,20 @@ def check_pyiceberg_df_equals_spark_df(df: pa.Table, spark_df: DataFrame) -> Non df_lhs = dict(df_lhs) assert df_lhs == df_rhs, f"Difference in data_file column {df_column}: {df_lhs} != {df_rhs}" - elif column == 'readable_metrics': + elif column == "readable_metrics": assert list(left.keys()) == [ - 'bool', - 'string', - 'string_long', - 'int', - 'long', - 'float', - 'double', - 'timestamp', - 'timestamptz', - 'date', - 'binary', - 'fixed', + "bool", + "string", + "string_long", + "int", + "long", + "float", + "double", + "timestamp", + "timestamptz", + "date", + "binary", + "fixed", ] assert left.keys() == right.keys() @@ -205,18 +205,18 @@ def check_pyiceberg_df_equals_spark_df(df: pa.Table, spark_df: DataFrame) -> Non rm_lhs = left[rm_column] rm_rhs = right[rm_column] - assert rm_lhs['column_size'] == rm_rhs['column_size'] - assert rm_lhs['value_count'] == rm_rhs['value_count'] - assert rm_lhs['null_value_count'] == rm_rhs['null_value_count'] - assert rm_lhs['nan_value_count'] == rm_rhs['nan_value_count'] + assert rm_lhs["column_size"] == rm_rhs["column_size"] + assert rm_lhs["value_count"] == rm_rhs["value_count"] + assert rm_lhs["null_value_count"] == rm_rhs["null_value_count"] + assert rm_lhs["nan_value_count"] == rm_rhs["nan_value_count"] - if rm_column == 'timestamptz': + if rm_column == "timestamptz": # PySpark does not correctly set the timstamptz - rm_rhs['lower_bound'] = rm_rhs['lower_bound'].replace(tzinfo=pytz.utc) - rm_rhs['upper_bound'] = rm_rhs['upper_bound'].replace(tzinfo=pytz.utc) + rm_rhs["lower_bound"] = rm_rhs["lower_bound"].replace(tzinfo=pytz.utc) + rm_rhs["upper_bound"] = rm_rhs["upper_bound"].replace(tzinfo=pytz.utc) - assert rm_lhs['lower_bound'] == rm_rhs['lower_bound'] - assert rm_lhs['upper_bound'] == rm_rhs['upper_bound'] + assert rm_lhs["lower_bound"] == rm_rhs["lower_bound"] + assert rm_lhs["upper_bound"] == rm_rhs["upper_bound"] else: assert left == right, f"Difference in column {column}: {left} != {right}" @@ -265,8 +265,8 @@ def test_inspect_entries_partitioned(spark: SparkSession, session_catalog: Catal df = session_catalog.load_table(identifier).inspect.entries() - assert df.to_pydict()['data_file'][0]['partition'] == {'dt_day': date(2021, 2, 1), 'dt_month': None} - assert df.to_pydict()['data_file'][1]['partition'] == {'dt_day': None, 'dt_month': 612} + assert df.to_pydict()["data_file"][0]["partition"] == {"dt_day": date(2021, 2, 1), "dt_month": None} + assert df.to_pydict()["data_file"][1]["partition"] == {"dt_day": None, "dt_month": 612} @pytest.mark.integration @@ -301,21 +301,21 @@ def test_inspect_refs( df = tbl.refresh().inspect.refs() assert df.column_names == [ - 'name', - 'type', - 'snapshot_id', - 'max_reference_age_in_ms', - 'min_snapshots_to_keep', - 'max_snapshot_age_in_ms', + "name", + "type", + "snapshot_id", + "max_reference_age_in_ms", + "min_snapshots_to_keep", + "max_snapshot_age_in_ms", ] - assert [name.as_py() for name in df['name']] == ['testBranch', 'main', 'testTag'] - assert [ref_type.as_py() for ref_type in df['type']] == ['BRANCH', 'BRANCH', 'TAG'] + assert [name.as_py() for name in df["name"]] == ["testBranch", "main", "testTag"] + assert [ref_type.as_py() for ref_type in df["type"]] == ["BRANCH", "BRANCH", "TAG"] - for snapshot_id in df['snapshot_id']: + for snapshot_id in df["snapshot_id"]: assert isinstance(snapshot_id.as_py(), int) - for int_column in ['max_reference_age_in_ms', 'min_snapshots_to_keep', 'max_snapshot_age_in_ms']: + for int_column in ["max_reference_age_in_ms", "min_snapshots_to_keep", "max_snapshot_age_in_ms"]: for value in df[int_column]: assert isinstance(value.as_py(), int) or not value.as_py() @@ -343,28 +343,28 @@ def test_inspect_partitions_unpartitioned( df = tbl.inspect.partitions() assert df.column_names == [ - 'record_count', - 'file_count', - 'total_data_file_size_in_bytes', - 'position_delete_record_count', - 'position_delete_file_count', - 'equality_delete_record_count', - 'equality_delete_file_count', - 'last_updated_at', - 'last_updated_snapshot_id', + "record_count", + "file_count", + "total_data_file_size_in_bytes", + "position_delete_record_count", + "position_delete_file_count", + "equality_delete_record_count", + "equality_delete_file_count", + "last_updated_at", + "last_updated_snapshot_id", ] - for last_updated_at in df['last_updated_at']: + for last_updated_at in df["last_updated_at"]: assert isinstance(last_updated_at.as_py(), datetime) int_cols = [ - 'record_count', - 'file_count', - 'total_data_file_size_in_bytes', - 'position_delete_record_count', - 'position_delete_file_count', - 'equality_delete_record_count', - 'equality_delete_file_count', - 'last_updated_snapshot_id', + "record_count", + "file_count", + "total_data_file_size_in_bytes", + "position_delete_record_count", + "position_delete_file_count", + "equality_delete_record_count", + "equality_delete_file_count", + "last_updated_snapshot_id", ] for column in int_cols: for value in df[column]: @@ -434,8 +434,8 @@ def test_inspect_partitions_partitioned(spark: SparkSession, session_catalog: Ca ) def check_pyiceberg_df_equals_spark_df(df: pa.Table, spark_df: DataFrame) -> None: - lhs = df.to_pandas().sort_values('spec_id') - rhs = spark_df.toPandas().sort_values('spec_id') + lhs = df.to_pandas().sort_values("spec_id") + rhs = spark_df.toPandas().sort_values("spec_id") for column in df.column_names: for left, right in zip(lhs[column].to_list(), rhs[column].to_list()): assert left == right, f"Difference in column {column}: {left} != {right}" @@ -445,3 +445,86 @@ def check_pyiceberg_df_equals_spark_df(df: pa.Table, spark_df: DataFrame) -> Non df = tbl.inspect.partitions(snapshot_id=snapshot.snapshot_id) spark_df = spark.sql(f"SELECT * FROM {identifier}.partitions VERSION AS OF {snapshot.snapshot_id}") check_pyiceberg_df_equals_spark_df(df, spark_df) + + +@pytest.mark.integration +@pytest.mark.parametrize("format_version", [1, 2]) +def test_inspect_manifests(spark: SparkSession, session_catalog: Catalog, format_version: int) -> None: + identifier = "default.table_metadata_manifests" + try: + session_catalog.drop_table(identifier=identifier) + except NoSuchTableError: + pass + + spark.sql( + f""" + CREATE TABLE {identifier} ( + id int, + data string + ) + PARTITIONED BY (data) + """ + ) + + spark.sql( + f""" + INSERT INTO {identifier} VALUES (1, "a") + """ + ) + + spark.sql( + f""" + INSERT INTO {identifier} VALUES (2, "b") + """ + ) + + df = session_catalog.load_table(identifier).inspect.manifests() + + assert df.column_names == [ + "content", + "path", + "length", + "partition_spec_id", + "added_snapshot_id", + "added_data_files_count", + "existing_data_files_count", + "deleted_data_files_count", + "added_delete_files_count", + "existing_delete_files_count", + "deleted_delete_files_count", + "partition_summaries", + ] + + int_cols = [ + "content", + "length", + "partition_spec_id", + "added_snapshot_id", + "added_data_files_count", + "existing_data_files_count", + "deleted_data_files_count", + "added_delete_files_count", + "existing_delete_files_count", + "deleted_delete_files_count", + ] + + for column in int_cols: + for value in df[column]: + assert isinstance(value.as_py(), int) + + for value in df["path"]: + assert isinstance(value.as_py(), str) + + for value in df["partition_summaries"]: + assert isinstance(value.as_py(), list) + for row in value: + assert isinstance(row["contains_null"].as_py(), bool) + assert isinstance(row["contains_nan"].as_py(), (bool, type(None))) + assert isinstance(row["lower_bound"].as_py(), (str, type(None))) + assert isinstance(row["upper_bound"].as_py(), (str, type(None))) + + lhs = spark.table(f"{identifier}.manifests").toPandas() + rhs = df.to_pandas() + for column in df.column_names: + for left, right in zip(lhs[column].to_list(), rhs[column].to_list()): + assert left == right, f"Difference in column {column}: {left} != {right}" diff --git a/tests/integration/test_partition_evolution.py b/tests/integration/test_partition_evolution.py index 785b34b82c..5cc7512f4a 100644 --- a/tests/integration/test_partition_evolution.py +++ b/tests/integration/test_partition_evolution.py @@ -73,7 +73,7 @@ def _create_table_with_schema(catalog: Catalog, schema: Schema, format_version: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_add_identity_partition(catalog: Catalog, table_schema_simple: Schema) -> None: simple_table = _simple_table(catalog, table_schema_simple) simple_table.update_spec().add_identity("foo").commit() @@ -85,7 +85,7 @@ def test_add_identity_partition(catalog: Catalog, table_schema_simple: Schema) - @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_add_year(catalog: Catalog) -> None: table = _table(catalog) table.update_spec().add_field("event_ts", YearTransform(), "year_transform").commit() @@ -93,7 +93,7 @@ def test_add_year(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_add_month(catalog: Catalog) -> None: table = _table(catalog) table.update_spec().add_field("event_ts", MonthTransform(), "month_transform").commit() @@ -101,7 +101,7 @@ def test_add_month(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_add_day(catalog: Catalog) -> None: table = _table(catalog) table.update_spec().add_field("event_ts", DayTransform(), "day_transform").commit() @@ -109,7 +109,7 @@ def test_add_day(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_add_hour(catalog: Catalog) -> None: table = _table(catalog) table.update_spec().add_field("event_ts", HourTransform(), "hour_transform").commit() @@ -117,7 +117,7 @@ def test_add_hour(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_add_bucket(catalog: Catalog, table_schema_simple: Schema) -> None: simple_table = _create_table_with_schema(catalog, table_schema_simple, "1") simple_table.update_spec().add_field("foo", BucketTransform(12), "bucket_transform").commit() @@ -125,7 +125,7 @@ def test_add_bucket(catalog: Catalog, table_schema_simple: Schema) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_add_truncate(catalog: Catalog, table_schema_simple: Schema) -> None: simple_table = _create_table_with_schema(catalog, table_schema_simple, "1") simple_table.update_spec().add_field("foo", TruncateTransform(1), "truncate_transform").commit() @@ -135,7 +135,7 @@ def test_add_truncate(catalog: Catalog, table_schema_simple: Schema) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_multiple_adds(catalog: Catalog) -> None: table = _table(catalog) table.update_spec().add_identity("id").add_field("event_ts", HourTransform(), "hourly_partitioned").add_field( @@ -153,7 +153,7 @@ def test_multiple_adds(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_add_hour_to_day(catalog: Catalog) -> None: table = _table(catalog) table.update_spec().add_field("event_ts", DayTransform(), "daily_partitioned").commit() @@ -169,7 +169,7 @@ def test_add_hour_to_day(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_add_multiple_buckets(catalog: Catalog) -> None: table = _table(catalog) table.update_spec().add_field("id", BucketTransform(16)).add_field("id", BucketTransform(4)).commit() @@ -184,7 +184,7 @@ def test_add_multiple_buckets(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_remove_identity(catalog: Catalog) -> None: table = _table(catalog) table.update_spec().add_identity("id").commit() @@ -192,12 +192,12 @@ def test_remove_identity(catalog: Catalog) -> None: assert len(table.specs()) == 3 assert table.spec().spec_id == 2 assert table.spec() == PartitionSpec( - PartitionField(source_id=1, field_id=1000, transform=VoidTransform(), name='id'), spec_id=2 + PartitionField(source_id=1, field_id=1000, transform=VoidTransform(), name="id"), spec_id=2 ) @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_remove_identity_v2(catalog: Catalog) -> None: table_v2 = _table_v2(catalog) table_v2.update_spec().add_identity("id").commit() @@ -208,7 +208,7 @@ def test_remove_identity_v2(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_remove_bucket(catalog: Catalog) -> None: table = _table(catalog) with table.update_spec() as update: @@ -223,13 +223,13 @@ def test_remove_bucket(catalog: Catalog) -> None: 1001, 2, 1001, - PartitionField(source_id=1, field_id=1000, transform=VoidTransform(), name='bucketed_id'), - PartitionField(source_id=2, field_id=1001, transform=DayTransform(), name='day_ts'), + PartitionField(source_id=1, field_id=1000, transform=VoidTransform(), name="bucketed_id"), + PartitionField(source_id=2, field_id=1001, transform=DayTransform(), name="day_ts"), ) @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_remove_bucket_v2(catalog: Catalog) -> None: table_v2 = _table_v2(catalog) with table_v2.update_spec() as update: @@ -239,12 +239,12 @@ def test_remove_bucket_v2(catalog: Catalog) -> None: remove.remove_field("bucketed_id") assert len(table_v2.specs()) == 3 _validate_new_partition_fields( - table_v2, 1001, 2, 1001, PartitionField(source_id=2, field_id=1001, transform=DayTransform(), name='day_ts') + table_v2, 1001, 2, 1001, PartitionField(source_id=2, field_id=1001, transform=DayTransform(), name="day_ts") ) @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_remove_day(catalog: Catalog) -> None: table = _table(catalog) with table.update_spec() as update: @@ -259,13 +259,13 @@ def test_remove_day(catalog: Catalog) -> None: 1001, 2, 1001, - PartitionField(source_id=1, field_id=1000, transform=BucketTransform(16), name='bucketed_id'), - PartitionField(source_id=2, field_id=1001, transform=VoidTransform(), name='day_ts'), + PartitionField(source_id=1, field_id=1000, transform=BucketTransform(16), name="bucketed_id"), + PartitionField(source_id=2, field_id=1001, transform=VoidTransform(), name="day_ts"), ) @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_remove_day_v2(catalog: Catalog) -> None: table_v2 = _table_v2(catalog) with table_v2.update_spec() as update: @@ -275,12 +275,12 @@ def test_remove_day_v2(catalog: Catalog) -> None: remove.remove_field("day_ts") assert len(table_v2.specs()) == 3 _validate_new_partition_fields( - table_v2, 1000, 2, 1001, PartitionField(source_id=1, field_id=1000, transform=BucketTransform(16), name='bucketed_id') + table_v2, 1000, 2, 1001, PartitionField(source_id=1, field_id=1000, transform=BucketTransform(16), name="bucketed_id") ) @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_rename(catalog: Catalog) -> None: table = _table(catalog) table.update_spec().add_identity("id").commit() @@ -291,7 +291,7 @@ def test_rename(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_cannot_add_and_remove(catalog: Catalog) -> None: table = _table(catalog) with pytest.raises(ValueError) as exc_info: @@ -300,7 +300,7 @@ def test_cannot_add_and_remove(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_cannot_add_redundant_time_partition(catalog: Catalog) -> None: table = _table(catalog) with pytest.raises(ValueError) as exc_info: @@ -311,7 +311,7 @@ def test_cannot_add_redundant_time_partition(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_cannot_delete_and_rename(catalog: Catalog) -> None: table = _table(catalog) with pytest.raises(ValueError) as exc_info: @@ -321,7 +321,7 @@ def test_cannot_delete_and_rename(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_cannot_rename_and_delete(catalog: Catalog) -> None: table = _table(catalog) with pytest.raises(ValueError) as exc_info: @@ -331,7 +331,7 @@ def test_cannot_rename_and_delete(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_cannot_add_same_tranform_for_same_field(catalog: Catalog) -> None: table = _table(catalog) with pytest.raises(ValueError) as exc_info: @@ -342,7 +342,7 @@ def test_cannot_add_same_tranform_for_same_field(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_cannot_add_same_field_multiple_times(catalog: Catalog) -> None: table = _table(catalog) with pytest.raises(ValueError) as exc_info: @@ -353,7 +353,7 @@ def test_cannot_add_same_field_multiple_times(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_cannot_add_multiple_specs_same_name(catalog: Catalog) -> None: table = _table(catalog) with pytest.raises(ValueError) as exc_info: @@ -364,7 +364,7 @@ def test_cannot_add_multiple_specs_same_name(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_change_specs_and_schema_transaction(catalog: Catalog) -> None: table = _table(catalog) with table.transaction() as transaction: @@ -387,17 +387,17 @@ def test_change_specs_and_schema_transaction(catalog: Catalog) -> None: ) assert table.schema() == Schema( - NestedField(field_id=1, name='id', field_type=LongType(), required=False), - NestedField(field_id=2, name='event_ts', field_type=TimestampType(), required=False), - NestedField(field_id=3, name='str', field_type=StringType(), required=False), - NestedField(field_id=4, name='col_string', field_type=StringType(), required=False), + NestedField(field_id=1, name="id", field_type=LongType(), required=False), + NestedField(field_id=2, name="event_ts", field_type=TimestampType(), required=False), + NestedField(field_id=3, name="str", field_type=StringType(), required=False), + NestedField(field_id=4, name="col_string", field_type=StringType(), required=False), identifier_field_ids=[], ) assert table.schema().schema_id == 1 @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_multiple_adds_and_remove_v1(catalog: Catalog) -> None: table = _table(catalog) with table.update_spec() as update: @@ -419,7 +419,7 @@ def test_multiple_adds_and_remove_v1(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_multiple_adds_and_remove_v2(catalog: Catalog) -> None: table_v2 = _table_v2(catalog) with table_v2.update_spec() as update: @@ -433,7 +433,7 @@ def test_multiple_adds_and_remove_v2(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_multiple_remove_and_add_reuses_v2(catalog: Catalog) -> None: table_v2 = _table_v2(catalog) with table_v2.update_spec() as update: diff --git a/tests/integration/test_partitioning_key.py b/tests/integration/test_partitioning_key.py index d89ecaf202..29f664909c 100644 --- a/tests/integration/test_partitioning_key.py +++ b/tests/integration/test_partitioning_key.py @@ -328,8 +328,8 @@ ), ( [PartitionField(source_id=11, field_id=1001, transform=IdentityTransform(), name="binary_field")], - [b'example'], - Record(binary_field=b'example'), + [b"example"], + Record(binary_field=b"example"), "binary_field=ZXhhbXBsZQ%3D%3D", f"""CREATE TABLE {identifier} ( binary_field binary, @@ -347,8 +347,8 @@ ), ( [PartitionField(source_id=13, field_id=1001, transform=IdentityTransform(), name="decimal_field")], - [Decimal('123.45')], - Record(decimal_field=Decimal('123.45')), + [Decimal("123.45")], + Record(decimal_field=Decimal("123.45")), "decimal_field=123.45", f"""CREATE TABLE {identifier} ( decimal_field decimal(5,2), @@ -638,8 +638,8 @@ ), ( [PartitionField(source_id=13, field_id=1001, transform=TruncateTransform(width=5), name="decimal_field_trunc")], - [Decimal('678.93')], - Record(decimal_field_trunc=Decimal('678.90')), + [Decimal("678.93")], + Record(decimal_field_trunc=Decimal("678.90")), "decimal_field_trunc=678.90", # Assuming truncation width of 1 leads to truncating to 670 f"""CREATE TABLE {identifier} ( decimal_field decimal(5,2), @@ -657,8 +657,8 @@ ), ( [PartitionField(source_id=11, field_id=1001, transform=TruncateTransform(10), name="binary_field_trunc")], - [b'HELLOICEBERG'], - Record(binary_field_trunc=b'HELLOICEBE'), + [b"HELLOICEBERG"], + Record(binary_field_trunc=b"HELLOICEBE"), "binary_field_trunc=SEVMTE9JQ0VCRQ%3D%3D", f"""CREATE TABLE {identifier} ( binary_field binary, diff --git a/tests/integration/test_reads.py b/tests/integration/test_reads.py index ee9b17e438..80a6f18632 100644 --- a/tests/integration/test_reads.py +++ b/tests/integration/test_reads.py @@ -17,6 +17,7 @@ # pylint:disable=redefined-outer-name import math +import time import uuid from urllib.parse import urlparse @@ -48,8 +49,9 @@ StringType, TimestampType, ) +from pyiceberg.utils.concurrent import ExecutorFactory -DEFAULT_PROPERTIES = {'write.parquet.compression-codec': 'zstd'} +DEFAULT_PROPERTIES = {"write.parquet.compression-codec": "zstd"} TABLE_NAME = ("default", "t1") @@ -72,7 +74,7 @@ def create_table(catalog: Catalog) -> Table: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_table_properties(catalog: Catalog) -> None: table = create_table(catalog) @@ -102,7 +104,7 @@ def test_table_properties(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_table_properties_dict(catalog: Catalog) -> None: table = create_table(catalog) @@ -132,7 +134,7 @@ def test_table_properties_dict(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_table_properties_error(catalog: Catalog) -> None: table = create_table(catalog) properties = {"abc": "def"} @@ -142,7 +144,7 @@ def test_table_properties_error(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_pyarrow_nan(catalog: Catalog) -> None: table_test_null_nan = catalog.load_table("default.test_null_nan") arrow_table = table_test_null_nan.scan(row_filter=IsNaN("col_numeric"), selected_fields=("idx", "col_numeric")).to_arrow() @@ -152,7 +154,7 @@ def test_pyarrow_nan(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_pyarrow_nan_rewritten(catalog: Catalog) -> None: table_test_null_nan_rewritten = catalog.load_table("default.test_null_nan_rewritten") arrow_table = table_test_null_nan_rewritten.scan( @@ -164,7 +166,7 @@ def test_pyarrow_nan_rewritten(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) @pytest.mark.skip(reason="Fixing issues with NaN's: https://github.com/apache/arrow/issues/34162") def test_pyarrow_not_nan_count(catalog: Catalog) -> None: table_test_null_nan = catalog.load_table("default.test_null_nan") @@ -173,7 +175,7 @@ def test_pyarrow_not_nan_count(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_duckdb_nan(catalog: Catalog) -> None: table_test_null_nan_rewritten = catalog.load_table("default.test_null_nan_rewritten") con = table_test_null_nan_rewritten.scan().to_duckdb("table_test_null_nan") @@ -183,7 +185,7 @@ def test_duckdb_nan(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_pyarrow_limit(catalog: Catalog) -> None: table_test_limit = catalog.load_table("default.test_limit") limited_result = table_test_limit.scan(selected_fields=("idx",), limit=1).to_arrow() @@ -198,7 +200,7 @@ def test_pyarrow_limit(catalog: Catalog) -> None: @pytest.mark.integration @pytest.mark.filterwarnings("ignore") -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_daft_nan(catalog: Catalog) -> None: table_test_null_nan_rewritten = catalog.load_table("default.test_null_nan_rewritten") df = table_test_null_nan_rewritten.to_daft() @@ -207,7 +209,7 @@ def test_daft_nan(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_daft_nan_rewritten(catalog: Catalog) -> None: table_test_null_nan_rewritten = catalog.load_table("default.test_null_nan_rewritten") df = table_test_null_nan_rewritten.to_daft() @@ -220,7 +222,7 @@ def test_daft_nan_rewritten(catalog: Catalog) -> None: @pytest.mark.integration @pytest.mark.filterwarnings("ignore") -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_ray_nan(catalog: Catalog) -> None: table_test_null_nan_rewritten = catalog.load_table("default.test_null_nan_rewritten") ray_dataset = table_test_null_nan_rewritten.scan().to_ray() @@ -229,7 +231,7 @@ def test_ray_nan(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_ray_nan_rewritten(catalog: Catalog) -> None: table_test_null_nan_rewritten = catalog.load_table("default.test_null_nan_rewritten") ray_dataset = table_test_null_nan_rewritten.scan( @@ -241,7 +243,7 @@ def test_ray_nan_rewritten(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) @pytest.mark.skip(reason="Fixing issues with NaN's: https://github.com/apache/arrow/issues/34162") def test_ray_not_nan_count(catalog: Catalog) -> None: table_test_null_nan_rewritten = catalog.load_table("default.test_null_nan_rewritten") @@ -250,7 +252,7 @@ def test_ray_not_nan_count(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_ray_all_types(catalog: Catalog) -> None: table_test_all_types = catalog.load_table("default.test_all_types") ray_dataset = table_test_all_types.scan().to_ray() @@ -260,7 +262,7 @@ def test_ray_all_types(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_pyarrow_to_iceberg_all_types(catalog: Catalog) -> None: table_test_all_types = catalog.load_table("default.test_all_types") fs = S3FileSystem( @@ -279,7 +281,7 @@ def test_pyarrow_to_iceberg_all_types(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_pyarrow_deletes(catalog: Catalog) -> None: # number, letter # (1, 'a'), @@ -316,7 +318,7 @@ def test_pyarrow_deletes(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_pyarrow_deletes_double(catalog: Catalog) -> None: # number, letter # (1, 'a'), @@ -353,7 +355,7 @@ def test_pyarrow_deletes_double(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_partitioned_tables(catalog: Catalog) -> None: for table_name, predicate in [ ("test_partitioned_by_identity", "ts >= '2023-03-05T00:00:00+00:00'"), @@ -370,7 +372,7 @@ def test_partitioned_tables(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_unpartitioned_uuid_table(catalog: Catalog) -> None: unpartitioned_uuid = catalog.load_table("default.test_uuid_and_fixed_unpartitioned") arrow_table_eq = unpartitioned_uuid.scan(row_filter="uuid_col == '102cb62f-e6f8-4eb0-9973-d9b012ff0967'").to_arrow() @@ -387,7 +389,7 @@ def test_unpartitioned_uuid_table(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_unpartitioned_fixed_table(catalog: Catalog) -> None: fixed_table = catalog.load_table("default.test_uuid_and_fixed_unpartitioned") arrow_table_eq = fixed_table.scan(row_filter=EqualTo("fixed_col", b"1234567890123456789012345")).to_arrow() @@ -406,7 +408,7 @@ def test_unpartitioned_fixed_table(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_scan_tag(catalog: Catalog) -> None: test_positional_mor_deletes = catalog.load_table("default.test_positional_mor_deletes") arrow_table = test_positional_mor_deletes.scan().use_ref("tag_12").to_arrow() @@ -414,7 +416,7 @@ def test_scan_tag(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_scan_branch(catalog: Catalog) -> None: test_positional_mor_deletes = catalog.load_table("default.test_positional_mor_deletes") arrow_table = test_positional_mor_deletes.scan().use_ref("without_5").to_arrow() @@ -422,21 +424,21 @@ def test_scan_branch(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_filter_on_new_column(catalog: Catalog) -> None: test_table_add_column = catalog.load_table("default.test_table_add_column") arrow_table = test_table_add_column.scan(row_filter="b == '2'").to_arrow() - assert arrow_table["b"].to_pylist() == ['2'] + assert arrow_table["b"].to_pylist() == ["2"] arrow_table = test_table_add_column.scan(row_filter="b is not null").to_arrow() - assert arrow_table["b"].to_pylist() == ['2'] + assert arrow_table["b"].to_pylist() == ["2"] arrow_table = test_table_add_column.scan(row_filter="b is null").to_arrow() assert arrow_table["b"].to_pylist() == [None] @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_upgrade_table_version(catalog: Catalog) -> None: table_test_table_version = catalog.load_table("default.test_table_version") @@ -464,7 +466,7 @@ def test_upgrade_table_version(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_sanitize_character(catalog: Catalog) -> None: table_test_table_sanitized_character = catalog.load_table("default.test_table_sanitized_character") arrow_table = table_test_table_sanitized_character.scan().to_arrow() @@ -474,7 +476,7 @@ def test_sanitize_character(catalog: Catalog) -> None: @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_null_list_and_map(catalog: Catalog) -> None: table_test_empty_list_and_map = catalog.load_table("default.test_table_empty_list_and_map") arrow_table = table_test_empty_list_and_map.scan().to_arrow() @@ -483,7 +485,7 @@ def test_null_list_and_map(catalog: Catalog) -> None: # This should be: # assert arrow_table["col_list_with_struct"].to_pylist() == [None, [{'test': 1}]] # Once https://github.com/apache/arrow/issues/38809 has been fixed - assert arrow_table["col_list_with_struct"].to_pylist() == [[], [{'test': 1}]] + assert arrow_table["col_list_with_struct"].to_pylist() == [[], [{"test": 1}]] @pytest.mark.integration @@ -506,3 +508,32 @@ def test_hive_locking(session_catalog_hive: HiveCatalog) -> None: table.transaction().set_properties(lock="fail").commit_transaction() finally: open_client.unlock(UnlockRequest(lock.lockid)) + + +@pytest.mark.integration +def test_hive_locking_with_retry(session_catalog_hive: HiveCatalog) -> None: + table = create_table(session_catalog_hive) + database_name: str + table_name: str + _, database_name, table_name = table.identifier + session_catalog_hive._lock_check_min_wait_time = 0.1 + session_catalog_hive._lock_check_max_wait_time = 0.5 + session_catalog_hive._lock_check_retries = 5 + + hive_client: _HiveClient = _HiveClient(session_catalog_hive.properties["uri"]) + + executor = ExecutorFactory.get_or_create() + + with hive_client as open_client: + + def another_task() -> None: + lock: LockResponse = open_client.lock(session_catalog_hive._create_lock_request(database_name, table_name)) + time.sleep(1) + open_client.unlock(UnlockRequest(lock.lockid)) + + # test transaction commit with concurrent locking + executor.submit(another_task) + time.sleep(0.5) + + table.transaction().set_properties(lock="xxx").commit_transaction() + assert table.properties.get("lock") == "xxx" diff --git a/tests/integration/test_rest_manifest.py b/tests/integration/test_rest_manifest.py index 8191209ae6..82c41cfd93 100644 --- a/tests/integration/test_rest_manifest.py +++ b/tests/integration/test_rest_manifest.py @@ -17,6 +17,7 @@ # pylint:disable=redefined-outer-name import inspect +from copy import copy from enum import Enum from tempfile import TemporaryDirectory from typing import Any @@ -26,7 +27,7 @@ from pyiceberg.catalog import Catalog, load_catalog from pyiceberg.io.pyarrow import PyArrowFileIO -from pyiceberg.manifest import DataFile, ManifestEntry, write_manifest +from pyiceberg.manifest import DataFile, write_manifest from pyiceberg.table import Table from pyiceberg.utils.lazydict import LazyDict @@ -99,11 +100,11 @@ def test_write_sample_manifest(table_test_all_types: Table) -> None: sort_order_id=entry.data_file.sort_order_id, spec_id=entry.data_file.spec_id, ) - wrapped_entry_v2 = ManifestEntry(*entry.record_fields()) + wrapped_entry_v2 = copy(entry) wrapped_entry_v2.data_file = wrapped_data_file_v2_debug wrapped_entry_v2_dict = todict(wrapped_entry_v2) # This one should not be written - del wrapped_entry_v2_dict['data_file']['spec_id'] + del wrapped_entry_v2_dict["data_file"]["spec_id"] with TemporaryDirectory() as tmpdir: tmp_avro_file = tmpdir + "/test_write_manifest.avro" diff --git a/tests/integration/test_rest_schema.py b/tests/integration/test_rest_schema.py index ac5d1ce050..f4ab98a883 100644 --- a/tests/integration/test_rest_schema.py +++ b/tests/integration/test_rest_schema.py @@ -358,16 +358,16 @@ def test_revert_changes(simple_table: Table, table_schema_simple: Schema) -> Non assert simple_table.schemas() == { 0: Schema( - NestedField(field_id=1, name='foo', field_type=StringType(), required=False), - NestedField(field_id=2, name='bar', field_type=IntegerType(), required=True), - NestedField(field_id=3, name='baz', field_type=BooleanType(), required=False), + NestedField(field_id=1, name="foo", field_type=StringType(), required=False), + NestedField(field_id=2, name="bar", field_type=IntegerType(), required=True), + NestedField(field_id=3, name="baz", field_type=BooleanType(), required=False), identifier_field_ids=[2], ), 1: Schema( - NestedField(field_id=1, name='foo', field_type=StringType(), required=False), - NestedField(field_id=2, name='bar', field_type=IntegerType(), required=True), - NestedField(field_id=3, name='baz', field_type=BooleanType(), required=False), - NestedField(field_id=4, name='data', field_type=IntegerType(), required=False), + NestedField(field_id=1, name="foo", field_type=StringType(), required=False), + NestedField(field_id=2, name="bar", field_type=IntegerType(), required=True), + NestedField(field_id=3, name="baz", field_type=BooleanType(), required=False), + NestedField(field_id=4, name="data", field_type=IntegerType(), required=False), identifier_field_ids=[2], ), } @@ -685,9 +685,9 @@ def test_rename_simple(simple_table: Table) -> None: # Check that the name mapping gets updated assert simple_table.name_mapping() == NameMapping([ - MappedField(field_id=1, names=['foo', 'vo']), - MappedField(field_id=2, names=['bar', 'var']), - MappedField(field_id=3, names=['baz']), + MappedField(field_id=1, names=["foo", "vo"]), + MappedField(field_id=2, names=["bar", "var"]), + MappedField(field_id=3, names=["baz"]), ]) @@ -719,7 +719,7 @@ def test_rename_simple_nested(catalog: Catalog) -> None: # Check that the name mapping gets updated assert tbl.name_mapping() == NameMapping([ - MappedField(field_id=1, names=['foo'], fields=[MappedField(field_id=2, names=['bar', 'vo'])]), + MappedField(field_id=1, names=["foo"], fields=[MappedField(field_id=2, names=["bar", "vo"])]), ]) diff --git a/tests/integration/test_writes/test_partitioned_writes.py b/tests/integration/test_writes/test_partitioned_writes.py index d84b9745a7..76d559ca57 100644 --- a/tests/integration/test_writes/test_partitioned_writes.py +++ b/tests/integration/test_writes/test_partitioned_writes.py @@ -16,6 +16,10 @@ # under the License. # pylint:disable=redefined-outer-name + +from datetime import date +from typing import Any, Set + import pyarrow as pa import pytest from pyspark.sql import SparkSession @@ -23,12 +27,14 @@ from pyiceberg.catalog import Catalog from pyiceberg.exceptions import NoSuchTableError from pyiceberg.partitioning import PartitionField, PartitionSpec +from pyiceberg.schema import Schema from pyiceberg.transforms import ( BucketTransform, DayTransform, HourTransform, IdentityTransform, MonthTransform, + Transform, TruncateTransform, YearTransform, ) @@ -38,7 +44,7 @@ @pytest.mark.integration @pytest.mark.parametrize( - "part_col", ['int', 'bool', 'string', "string_long", "long", "float", "double", "date", 'timestamp', 'timestamptz', 'binary'] + "part_col", ["int", "bool", "string", "string_long", "long", "float", "double", "date", "timestamp", "timestamptz", "binary"] ) @pytest.mark.parametrize("format_version", [1, 2]) def test_query_filter_null_partitioned( @@ -71,7 +77,7 @@ def test_query_filter_null_partitioned( @pytest.mark.integration @pytest.mark.parametrize( - "part_col", ['int', 'bool', 'string', "string_long", "long", "float", "double", "date", 'timestamp', 'timestamptz', 'binary'] + "part_col", ["int", "bool", "string", "string_long", "long", "float", "double", "date", "timestamp", "timestamptz", "binary"] ) @pytest.mark.parametrize("format_version", [1, 2]) def test_query_filter_without_data_partitioned( @@ -103,7 +109,7 @@ def test_query_filter_without_data_partitioned( @pytest.mark.integration @pytest.mark.parametrize( - "part_col", ['int', 'bool', 'string', "string_long", "long", "float", "double", "date", 'timestamp', 'timestamptz', 'binary'] + "part_col", ["int", "bool", "string", "string_long", "long", "float", "double", "date", "timestamp", "timestamptz", "binary"] ) @pytest.mark.parametrize("format_version", [1, 2]) def test_query_filter_only_nulls_partitioned( @@ -135,7 +141,7 @@ def test_query_filter_only_nulls_partitioned( @pytest.mark.integration @pytest.mark.parametrize( - "part_col", ['int', 'bool', 'string', "string_long", "long", "float", "double", "date", "timestamptz", "timestamp", "binary"] + "part_col", ["int", "bool", "string", "string_long", "long", "float", "double", "date", "timestamptz", "timestamp", "binary"] ) @pytest.mark.parametrize("format_version", [1, 2]) def test_query_filter_appended_null_partitioned( @@ -174,7 +180,7 @@ def test_query_filter_appended_null_partitioned( @pytest.mark.integration @pytest.mark.parametrize( - "part_col", ['int', 'bool', 'string', "string_long", "long", "float", "double", "date", "timestamptz", "timestamp", "binary"] + "part_col", ["int", "bool", "string", "string_long", "long", "float", "double", "date", "timestamptz", "timestamp", "binary"] ) def test_query_filter_v1_v2_append_null( session_catalog: Catalog, spark: SparkSession, arrow_table_with_null: pa.Table, part_col: str @@ -225,7 +231,7 @@ def test_summaries_with_null(spark: SparkSession, session_catalog: Catalog, arro identifier=identifier, schema=TABLE_SCHEMA, partition_spec=PartitionSpec(PartitionField(source_id=4, field_id=1001, transform=IdentityTransform(), name="int")), - properties={'format-version': '2'}, + properties={"format-version": "2"}, ) tbl.append(arrow_table_with_null) @@ -240,33 +246,33 @@ def test_summaries_with_null(spark: SparkSession, session_catalog: Catalog, arro ).collect() operations = [row.operation for row in rows] - assert operations == ['append', 'append'] + assert operations == ["append", "append"] summaries = [row.summary for row in rows] assert summaries[0] == { - 'changed-partition-count': '3', - 'added-data-files': '3', - 'added-files-size': '15029', - 'added-records': '3', - 'total-data-files': '3', - 'total-delete-files': '0', - 'total-equality-deletes': '0', - 'total-files-size': '15029', - 'total-position-deletes': '0', - 'total-records': '3', + "changed-partition-count": "3", + "added-data-files": "3", + "added-files-size": "15029", + "added-records": "3", + "total-data-files": "3", + "total-delete-files": "0", + "total-equality-deletes": "0", + "total-files-size": "15029", + "total-position-deletes": "0", + "total-records": "3", } assert summaries[1] == { - 'changed-partition-count': '3', - 'added-data-files': '3', - 'added-files-size': '15029', - 'added-records': '3', - 'total-data-files': '6', - 'total-delete-files': '0', - 'total-equality-deletes': '0', - 'total-files-size': '30058', - 'total-position-deletes': '0', - 'total-records': '6', + "changed-partition-count": "3", + "added-data-files": "3", + "added-files-size": "15029", + "added-records": "3", + "total-data-files": "6", + "total-delete-files": "0", + "total-equality-deletes": "0", + "total-files-size": "30058", + "total-position-deletes": "0", + "total-records": "6", } @@ -284,7 +290,7 @@ def test_data_files_with_table_partitioned_with_null( identifier=identifier, schema=TABLE_SCHEMA, partition_spec=PartitionSpec(PartitionField(source_id=4, field_id=1001, transform=IdentityTransform(), name="int")), - properties={'format-version': '1'}, + properties={"format-version": "1"}, ) tbl.append(arrow_table_with_null) @@ -320,7 +326,7 @@ def test_invalid_arguments(spark: SparkSession, session_catalog: Catalog) -> Non identifier=identifier, schema=TABLE_SCHEMA, partition_spec=PartitionSpec(PartitionField(source_id=4, field_id=1001, transform=IdentityTransform(), name="int")), - properties={'format-version': '1'}, + properties={"format-version": "1"}, ) with pytest.raises(ValueError, match="Expected PyArrow table, got: not a df"): @@ -351,18 +357,6 @@ def test_invalid_arguments(spark: SparkSession, session_catalog: Catalog) -> Non (PartitionSpec(PartitionField(source_id=5, field_id=1001, transform=TruncateTransform(2), name="long_trunc"))), (PartitionSpec(PartitionField(source_id=2, field_id=1001, transform=TruncateTransform(2), name="string_trunc"))), (PartitionSpec(PartitionField(source_id=11, field_id=1001, transform=TruncateTransform(2), name="binary_trunc"))), - (PartitionSpec(PartitionField(source_id=8, field_id=1001, transform=YearTransform(), name="timestamp_year"))), - (PartitionSpec(PartitionField(source_id=9, field_id=1001, transform=YearTransform(), name="timestamptz_year"))), - (PartitionSpec(PartitionField(source_id=10, field_id=1001, transform=YearTransform(), name="date_year"))), - (PartitionSpec(PartitionField(source_id=8, field_id=1001, transform=MonthTransform(), name="timestamp_month"))), - (PartitionSpec(PartitionField(source_id=9, field_id=1001, transform=MonthTransform(), name="timestamptz_month"))), - (PartitionSpec(PartitionField(source_id=10, field_id=1001, transform=MonthTransform(), name="date_month"))), - (PartitionSpec(PartitionField(source_id=8, field_id=1001, transform=DayTransform(), name="timestamp_day"))), - (PartitionSpec(PartitionField(source_id=9, field_id=1001, transform=DayTransform(), name="timestamptz_day"))), - (PartitionSpec(PartitionField(source_id=10, field_id=1001, transform=DayTransform(), name="date_day"))), - (PartitionSpec(PartitionField(source_id=8, field_id=1001, transform=HourTransform(), name="timestamp_hour"))), - (PartitionSpec(PartitionField(source_id=9, field_id=1001, transform=HourTransform(), name="timestamptz_hour"))), - (PartitionSpec(PartitionField(source_id=10, field_id=1001, transform=HourTransform(), name="date_hour"))), ], ) def test_unsupported_transform( @@ -379,8 +373,189 @@ def test_unsupported_transform( identifier=identifier, schema=TABLE_SCHEMA, partition_spec=spec, - properties={'format-version': '1'}, + properties={"format-version": "1"}, ) - with pytest.raises(ValueError, match="All transforms are not supported.*"): + with pytest.raises( + ValueError, + match="Not all partition types are supported for writes. Following partitions cannot be written using pyarrow: *", + ): tbl.append(arrow_table_with_null) + + +@pytest.mark.integration +@pytest.mark.parametrize( + "transform,expected_rows", + [ + pytest.param(YearTransform(), 2, id="year_transform"), + pytest.param(MonthTransform(), 3, id="month_transform"), + pytest.param(DayTransform(), 3, id="day_transform"), + ], +) +@pytest.mark.parametrize("part_col", ["date", "timestamp", "timestamptz"]) +@pytest.mark.parametrize("format_version", [1, 2]) +def test_append_ymd_transform_partitioned( + session_catalog: Catalog, + spark: SparkSession, + arrow_table_with_null: pa.Table, + transform: Transform[Any, Any], + expected_rows: int, + part_col: str, + format_version: int, +) -> None: + # Given + identifier = f"default.arrow_table_v{format_version}_with_{str(transform)}_partition_on_col_{part_col}" + nested_field = TABLE_SCHEMA.find_field(part_col) + partition_spec = PartitionSpec( + PartitionField(source_id=nested_field.field_id, field_id=1001, transform=transform, name=part_col) + ) + + # When + tbl = _create_table( + session_catalog=session_catalog, + identifier=identifier, + properties={"format-version": str(format_version)}, + data=[arrow_table_with_null], + partition_spec=partition_spec, + ) + + # Then + assert tbl.format_version == format_version, f"Expected v{format_version}, got: v{tbl.format_version}" + df = spark.table(identifier) + assert df.count() == 3, f"Expected 3 total rows for {identifier}" + for col in TEST_DATA_WITH_NULL.keys(): + assert df.where(f"{col} is not null").count() == 2, f"Expected 2 non-null rows for {col}" + assert df.where(f"{col} is null").count() == 1, f"Expected 1 null row for {col} is null" + + assert tbl.inspect.partitions().num_rows == expected_rows + files_df = spark.sql( + f""" + SELECT * + FROM {identifier}.files + """ + ) + assert files_df.count() == expected_rows + + +@pytest.mark.integration +@pytest.mark.parametrize( + "transform,expected_partitions", + [ + pytest.param(YearTransform(), {53, 54, None}, id="year_transform"), + pytest.param(MonthTransform(), {647, 648, 649, None}, id="month_transform"), + pytest.param( + DayTransform(), {date(2023, 12, 31), date(2024, 1, 1), date(2024, 1, 31), date(2024, 2, 1), None}, id="day_transform" + ), + pytest.param(HourTransform(), {473328, 473352, 474072, 474096, 474102, None}, id="hour_transform"), + ], +) +@pytest.mark.parametrize("format_version", [1, 2]) +def test_append_transform_partition_verify_partitions_count( + session_catalog: Catalog, + spark: SparkSession, + arrow_table_date_timestamps: pa.Table, + arrow_table_date_timestamps_schema: Schema, + transform: Transform[Any, Any], + expected_partitions: Set[Any], + format_version: int, +) -> None: + # Given + part_col = "timestamptz" + identifier = f"default.arrow_table_v{format_version}_with_{str(transform)}_transform_partitioned_on_col_{part_col}" + nested_field = arrow_table_date_timestamps_schema.find_field(part_col) + partition_spec = PartitionSpec( + PartitionField(source_id=nested_field.field_id, field_id=1001, transform=transform, name=part_col), + ) + + # When + tbl = _create_table( + session_catalog=session_catalog, + identifier=identifier, + properties={"format-version": str(format_version)}, + data=[arrow_table_date_timestamps], + partition_spec=partition_spec, + schema=arrow_table_date_timestamps_schema, + ) + + # Then + assert tbl.format_version == format_version, f"Expected v{format_version}, got: v{tbl.format_version}" + df = spark.table(identifier) + assert df.count() == 6, f"Expected 6 total rows for {identifier}" + for col in arrow_table_date_timestamps.column_names: + assert df.where(f"{col} is not null").count() == 5, f"Expected 2 non-null rows for {col}" + assert df.where(f"{col} is null").count() == 1, f"Expected 1 null row for {col} is null" + + partitions_table = tbl.inspect.partitions() + assert partitions_table.num_rows == len(expected_partitions) + assert {part[part_col] for part in partitions_table["partition"].to_pylist()} == expected_partitions + files_df = spark.sql( + f""" + SELECT * + FROM {identifier}.files + """ + ) + assert files_df.count() == len(expected_partitions) + + +@pytest.mark.integration +@pytest.mark.parametrize("format_version", [1, 2]) +def test_append_multiple_partitions( + session_catalog: Catalog, + spark: SparkSession, + arrow_table_date_timestamps: pa.Table, + arrow_table_date_timestamps_schema: Schema, + format_version: int, +) -> None: + # Given + identifier = f"default.arrow_table_v{format_version}_with_multiple_partitions" + partition_spec = PartitionSpec( + PartitionField( + source_id=arrow_table_date_timestamps_schema.find_field("date").field_id, + field_id=1001, + transform=YearTransform(), + name="date_year", + ), + PartitionField( + source_id=arrow_table_date_timestamps_schema.find_field("timestamptz").field_id, + field_id=1000, + transform=HourTransform(), + name="timestamptz_hour", + ), + ) + + # When + tbl = _create_table( + session_catalog=session_catalog, + identifier=identifier, + properties={"format-version": str(format_version)}, + data=[arrow_table_date_timestamps], + partition_spec=partition_spec, + schema=arrow_table_date_timestamps_schema, + ) + + # Then + assert tbl.format_version == format_version, f"Expected v{format_version}, got: v{tbl.format_version}" + df = spark.table(identifier) + assert df.count() == 6, f"Expected 6 total rows for {identifier}" + for col in arrow_table_date_timestamps.column_names: + assert df.where(f"{col} is not null").count() == 5, f"Expected 2 non-null rows for {col}" + assert df.where(f"{col} is null").count() == 1, f"Expected 1 null row for {col} is null" + + partitions_table = tbl.inspect.partitions() + assert partitions_table.num_rows == 6 + partitions = partitions_table["partition"].to_pylist() + assert {(part["date_year"], part["timestamptz_hour"]) for part in partitions} == { + (53, 473328), + (54, 473352), + (54, 474072), + (54, 474096), + (54, 474102), + (None, None), + } + files_df = spark.sql( + f""" + SELECT * + FROM {identifier}.files + """ + ) + assert files_df.count() == 6 diff --git a/tests/integration/test_writes/test_writes.py b/tests/integration/test_writes/test_writes.py index 8bebc53d92..e329adcd5c 100644 --- a/tests/integration/test_writes/test_writes.py +++ b/tests/integration/test_writes/test_writes.py @@ -34,6 +34,7 @@ from pyiceberg.catalog import Catalog from pyiceberg.catalog.hive import HiveCatalog +from pyiceberg.catalog.rest import RestCatalog from pyiceberg.catalog.sql import SqlCatalog from pyiceberg.exceptions import NoSuchTableError from pyiceberg.partitioning import PartitionField, PartitionSpec @@ -186,47 +187,47 @@ def test_summaries(spark: SparkSession, session_catalog: Catalog, arrow_table_wi ).collect() operations = [row.operation for row in rows] - assert operations == ['append', 'append', 'overwrite'] + assert operations == ["append", "append", "overwrite"] summaries = [row.summary for row in rows] assert summaries[0] == { - 'added-data-files': '1', - 'added-files-size': '5459', - 'added-records': '3', - 'total-data-files': '1', - 'total-delete-files': '0', - 'total-equality-deletes': '0', - 'total-files-size': '5459', - 'total-position-deletes': '0', - 'total-records': '3', + "added-data-files": "1", + "added-files-size": "5459", + "added-records": "3", + "total-data-files": "1", + "total-delete-files": "0", + "total-equality-deletes": "0", + "total-files-size": "5459", + "total-position-deletes": "0", + "total-records": "3", } assert summaries[1] == { - 'added-data-files': '1', - 'added-files-size': '5459', - 'added-records': '3', - 'total-data-files': '2', - 'total-delete-files': '0', - 'total-equality-deletes': '0', - 'total-files-size': '10918', - 'total-position-deletes': '0', - 'total-records': '6', + "added-data-files": "1", + "added-files-size": "5459", + "added-records": "3", + "total-data-files": "2", + "total-delete-files": "0", + "total-equality-deletes": "0", + "total-files-size": "10918", + "total-position-deletes": "0", + "total-records": "6", } assert summaries[2] == { - 'added-data-files': '1', - 'added-files-size': '5459', - 'added-records': '3', - 'deleted-data-files': '2', - 'deleted-records': '6', - 'removed-files-size': '10918', - 'total-data-files': '1', - 'total-delete-files': '0', - 'total-equality-deletes': '0', - 'total-files-size': '5459', - 'total-position-deletes': '0', - 'total-records': '3', + "added-data-files": "1", + "added-files-size": "5459", + "added-records": "3", + "deleted-data-files": "2", + "deleted-records": "6", + "removed-files-size": "10918", + "total-data-files": "1", + "total-delete-files": "0", + "total-equality-deletes": "0", + "total-files-size": "5459", + "total-position-deletes": "0", + "total-records": "3", } @@ -283,25 +284,25 @@ def test_python_writes_special_character_column_with_spark_reads( identifier = "default.python_writes_special_character_column_with_spark_reads" column_name_with_special_character = "letter/abc" TEST_DATA_WITH_SPECIAL_CHARACTER_COLUMN = { - column_name_with_special_character: ['a', None, 'z'], - 'id': [1, 2, 3], - 'name': ['AB', 'CD', 'EF'], - 'address': [ - {'street': '123', 'city': 'SFO', 'zip': 12345, column_name_with_special_character: 'a'}, - {'street': '456', 'city': 'SW', 'zip': 67890, column_name_with_special_character: 'b'}, - {'street': '789', 'city': 'Random', 'zip': 10112, column_name_with_special_character: 'c'}, + column_name_with_special_character: ["a", None, "z"], + "id": [1, 2, 3], + "name": ["AB", "CD", "EF"], + "address": [ + {"street": "123", "city": "SFO", "zip": 12345, column_name_with_special_character: "a"}, + {"street": "456", "city": "SW", "zip": 67890, column_name_with_special_character: "b"}, + {"street": "789", "city": "Random", "zip": 10112, column_name_with_special_character: "c"}, ], } pa_schema = pa.schema([ pa.field(column_name_with_special_character, pa.string()), - pa.field('id', pa.int32()), - pa.field('name', pa.string()), + pa.field("id", pa.int32()), + pa.field("name", pa.string()), pa.field( - 'address', + "address", pa.struct([ - pa.field('street', pa.string()), - pa.field('city', pa.string()), - pa.field('zip', pa.int32()), + pa.field("street", pa.string()), + pa.field("city", pa.string()), + pa.field("zip", pa.int32()), pa.field(column_name_with_special_character, pa.string()), ]), ), @@ -315,6 +316,30 @@ def test_python_writes_special_character_column_with_spark_reads( assert spark_df.equals(pyiceberg_df) +@pytest.mark.integration +@pytest.mark.parametrize("format_version", [1, 2]) +def test_python_writes_dictionary_encoded_column_with_spark_reads( + spark: SparkSession, session_catalog: Catalog, format_version: int +) -> None: + identifier = "default.python_writes_dictionary_encoded_column_with_spark_reads" + TEST_DATA = { + "id": [1, 2, 3, 1, 1], + "name": ["AB", "CD", "EF", "CD", "EF"], + } + pa_schema = pa.schema([ + pa.field("id", pa.dictionary(pa.int32(), pa.int32(), False)), + pa.field("name", pa.dictionary(pa.int32(), pa.string(), False)), + ]) + arrow_table = pa.Table.from_pydict(TEST_DATA, schema=pa_schema) + + tbl = _create_table(session_catalog, identifier, {"format-version": format_version}, schema=pa_schema) + + tbl.overwrite(arrow_table) + spark_df = spark.sql(f"SELECT * FROM {identifier}").toPandas() + pyiceberg_df = tbl.scan().to_pandas() + assert spark_df.equals(pyiceberg_df) + + @pytest.mark.integration def test_write_bin_pack_data_files(spark: SparkSession, session_catalog: Catalog, arrow_table_with_null: pa.Table) -> None: identifier = "default.write_bin_pack_data_files" @@ -449,7 +474,7 @@ def test_write_parquet_unsupported_properties( @pytest.mark.integration def test_invalid_arguments(spark: SparkSession, session_catalog: Catalog, arrow_table_with_null: pa.Table) -> None: identifier = "default.arrow_data_files" - tbl = _create_table(session_catalog, identifier, {'format-version': '1'}, []) + tbl = _create_table(session_catalog, identifier, {"format-version": "1"}, []) with pytest.raises(ValueError, match="Expected PyArrow table, got: not a df"): tbl.overwrite("not a df") @@ -464,7 +489,7 @@ def test_summaries_with_only_nulls( ) -> None: identifier = "default.arrow_table_summaries_with_only_nulls" tbl = _create_table( - session_catalog, identifier, {'format-version': '1'}, [arrow_table_without_data, arrow_table_with_only_nulls] + session_catalog, identifier, {"format-version": "1"}, [arrow_table_without_data, arrow_table_with_only_nulls] ) tbl.overwrite(arrow_table_without_data) @@ -477,49 +502,49 @@ def test_summaries_with_only_nulls( ).collect() operations = [row.operation for row in rows] - assert operations == ['append', 'append', 'overwrite'] + assert operations == ["append", "append", "overwrite"] summaries = [row.summary for row in rows] assert summaries[0] == { - 'total-data-files': '0', - 'total-delete-files': '0', - 'total-equality-deletes': '0', - 'total-files-size': '0', - 'total-position-deletes': '0', - 'total-records': '0', + "total-data-files": "0", + "total-delete-files": "0", + "total-equality-deletes": "0", + "total-files-size": "0", + "total-position-deletes": "0", + "total-records": "0", } assert summaries[1] == { - 'added-data-files': '1', - 'added-files-size': '4239', - 'added-records': '2', - 'total-data-files': '1', - 'total-delete-files': '0', - 'total-equality-deletes': '0', - 'total-files-size': '4239', - 'total-position-deletes': '0', - 'total-records': '2', + "added-data-files": "1", + "added-files-size": "4239", + "added-records": "2", + "total-data-files": "1", + "total-delete-files": "0", + "total-equality-deletes": "0", + "total-files-size": "4239", + "total-position-deletes": "0", + "total-records": "2", } assert summaries[2] == { - 'removed-files-size': '4239', - 'total-equality-deletes': '0', - 'total-position-deletes': '0', - 'deleted-data-files': '1', - 'total-delete-files': '0', - 'total-files-size': '0', - 'deleted-records': '2', - 'total-data-files': '0', - 'total-records': '0', + "removed-files-size": "4239", + "total-equality-deletes": "0", + "total-position-deletes": "0", + "deleted-data-files": "1", + "total-delete-files": "0", + "total-files-size": "0", + "deleted-records": "2", + "total-data-files": "0", + "total-records": "0", } @pytest.mark.integration def test_duckdb_url_import(warehouse: Path, arrow_table_with_null: pa.Table) -> None: - os.environ['TZ'] = 'Etc/UTC' + os.environ["TZ"] = "Etc/UTC" time.tzset() - tz = pytz.timezone(os.environ['TZ']) + tz = pytz.timezone(os.environ["TZ"]) catalog = SqlCatalog("test_sql_catalog", uri="sqlite:///:memory:", warehouse=f"/{warehouse}") catalog.create_namespace("default") @@ -530,7 +555,7 @@ def test_duckdb_url_import(warehouse: Path, arrow_table_with_null: pa.Table) -> import duckdb - duckdb.sql('INSTALL iceberg; LOAD iceberg;') + duckdb.sql("INSTALL iceberg; LOAD iceberg;") result = duckdb.sql( f""" SELECT * @@ -541,8 +566,8 @@ def test_duckdb_url_import(warehouse: Path, arrow_table_with_null: pa.Table) -> assert result == [ ( False, - 'a', - 'aaaaaaaaaaaaaaaaaaaaaa', + "a", + "aaaaaaaaaaaaaaaaaaaaaa", 1, 1, 0.0, @@ -550,14 +575,14 @@ def test_duckdb_url_import(warehouse: Path, arrow_table_with_null: pa.Table) -> datetime(2023, 1, 1, 19, 25), datetime(2023, 1, 1, 19, 25, tzinfo=tz), date(2023, 1, 1), - b'\x01', - b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00', + b"\x01", + b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", ), (None, None, None, None, None, None, None, None, None, None, None, None), ( True, - 'z', - 'zzzzzzzzzzzzzzzzzzzzzz', + "z", + "zzzzzzzzzzzzzzzzzzzzzz", 9, 9, 0.8999999761581421, @@ -565,8 +590,8 @@ def test_duckdb_url_import(warehouse: Path, arrow_table_with_null: pa.Table) -> datetime(2023, 3, 1, 19, 25), datetime(2023, 3, 1, 19, 25, tzinfo=tz), date(2023, 3, 1), - b'\x12', - b'\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11', + b"\x12", + b"\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11\x11", ), ] @@ -583,7 +608,7 @@ def test_write_and_evolve(session_catalog: Catalog, format_version: int) -> None pa_table = pa.Table.from_pydict( { - 'foo': ['a', None, 'z'], + "foo": ["a", None, "z"], }, schema=pa.schema([pa.field("foo", pa.string(), nullable=True)]), ) @@ -594,8 +619,8 @@ def test_write_and_evolve(session_catalog: Catalog, format_version: int) -> None pa_table_with_column = pa.Table.from_pydict( { - 'foo': ['a', None, 'z'], - 'bar': [19, None, 25], + "foo": ["a", None, "z"], + "bar": [19, None, 25], }, schema=pa.schema([ pa.field("foo", pa.string(), nullable=True), @@ -613,31 +638,32 @@ def test_write_and_evolve(session_catalog: Catalog, format_version: int) -> None @pytest.mark.integration -@pytest.mark.parametrize("format_version", [2]) -def test_create_table_transaction(session_catalog: Catalog, format_version: int) -> None: - if format_version == 1: +@pytest.mark.parametrize("format_version", [1, 2]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) +def test_create_table_transaction(catalog: Catalog, format_version: int) -> None: + if format_version == 1 and isinstance(catalog, RestCatalog): pytest.skip( "There is a bug in the REST catalog (maybe server side) that prevents create and commit a staged version 1 table" ) - identifier = f"default.arrow_create_table_transaction{format_version}" + identifier = f"default.arrow_create_table_transaction_{catalog.name}_{format_version}" try: - session_catalog.drop_table(identifier=identifier) + catalog.drop_table(identifier=identifier) except NoSuchTableError: pass pa_table = pa.Table.from_pydict( { - 'foo': ['a', None, 'z'], + "foo": ["a", None, "z"], }, schema=pa.schema([pa.field("foo", pa.string(), nullable=True)]), ) pa_table_with_column = pa.Table.from_pydict( { - 'foo': ['a', None, 'z'], - 'bar': [19, None, 25], + "foo": ["a", None, "z"], + "bar": [19, None, 25], }, schema=pa.schema([ pa.field("foo", pa.string(), nullable=True), @@ -645,7 +671,7 @@ def test_create_table_transaction(session_catalog: Catalog, format_version: int) ]), ) - with session_catalog.create_table_transaction( + with catalog.create_table_transaction( identifier=identifier, schema=pa_table.schema, properties={"format-version": str(format_version)} ) as txn: with txn.update_snapshot().fast_append() as snapshot_update: @@ -661,7 +687,7 @@ def test_create_table_transaction(session_catalog: Catalog, format_version: int) ): snapshot_update.append_data_file(data_file) - tbl = session_catalog.load_table(identifier=identifier) + tbl = catalog.load_table(identifier=identifier) assert tbl.format_version == format_version assert len(tbl.scan().to_arrow()) == 6 @@ -717,45 +743,45 @@ def test_inspect_snapshots( df = tbl.inspect.snapshots() assert df.column_names == [ - 'committed_at', - 'snapshot_id', - 'parent_id', - 'operation', - 'manifest_list', - 'summary', + "committed_at", + "snapshot_id", + "parent_id", + "operation", + "manifest_list", + "summary", ] - for committed_at in df['committed_at']: + for committed_at in df["committed_at"]: assert isinstance(committed_at.as_py(), datetime) - for snapshot_id in df['snapshot_id']: + for snapshot_id in df["snapshot_id"]: assert isinstance(snapshot_id.as_py(), int) - assert df['parent_id'][0].as_py() is None - assert df['parent_id'][1:] == df['snapshot_id'][:2] + assert df["parent_id"][0].as_py() is None + assert df["parent_id"][1:] == df["snapshot_id"][:2] - assert [operation.as_py() for operation in df['operation']] == ['append', 'overwrite', 'append'] + assert [operation.as_py() for operation in df["operation"]] == ["append", "overwrite", "append"] - for manifest_list in df['manifest_list']: + for manifest_list in df["manifest_list"]: assert manifest_list.as_py().startswith("s3://") - assert df['summary'][0].as_py() == [ - ('added-files-size', '5459'), - ('added-data-files', '1'), - ('added-records', '3'), - ('total-data-files', '1'), - ('total-delete-files', '0'), - ('total-records', '3'), - ('total-files-size', '5459'), - ('total-position-deletes', '0'), - ('total-equality-deletes', '0'), + assert df["summary"][0].as_py() == [ + ("added-files-size", "5459"), + ("added-data-files", "1"), + ("added-records", "3"), + ("total-data-files", "1"), + ("total-delete-files", "0"), + ("total-records", "3"), + ("total-files-size", "5459"), + ("total-position-deletes", "0"), + ("total-equality-deletes", "0"), ] lhs = spark.table(f"{identifier}.snapshots").toPandas() rhs = df.to_pandas() for column in df.column_names: for left, right in zip(lhs[column].to_list(), rhs[column].to_list()): - if column == 'summary': + if column == "summary": # Arrow returns a list of tuples, instead of a dict right = dict(right) @@ -814,7 +840,7 @@ def test_hive_catalog_storage_descriptor( @pytest.mark.integration -@pytest.mark.parametrize('catalog', [pytest.lazy_fixture('session_catalog_hive'), pytest.lazy_fixture('session_catalog')]) +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_sanitize_character_partitioned(catalog: Catalog) -> None: table_name = "default.test_table_partitioned_sanitized_character" try: @@ -833,3 +859,15 @@ def test_sanitize_character_partitioned(catalog: Catalog) -> None: ) assert len(tbl.scan().to_arrow()) == 22 + + +@pytest.mark.parametrize("format_version", [1, 2]) +def table_write_subset_of_schema(session_catalog: Catalog, arrow_table_with_null: pa.Table, format_version: int) -> None: + identifier = "default.table_append_subset_of_schema" + tbl = _create_table(session_catalog, identifier, {"format-version": format_version}, [arrow_table_with_null]) + arrow_table_without_some_columns = arrow_table_with_null.combine_chunks().drop(arrow_table_with_null.column_names[0]) + assert len(arrow_table_without_some_columns.columns) < len(arrow_table_with_null.columns) + tbl.overwrite(arrow_table_without_some_columns) + tbl.append(arrow_table_without_some_columns) + # overwrite and then append should produce twice the data + assert len(tbl.scan().to_arrow()) == len(arrow_table_without_some_columns) * 2 diff --git a/tests/io/test_pyarrow.py b/tests/io/test_pyarrow.py index 90f5b08bf0..ec511f959d 100644 --- a/tests/io/test_pyarrow.py +++ b/tests/io/test_pyarrow.py @@ -1215,13 +1215,13 @@ def test_projection_list_of_structs(schema_list_of_structs: Schema, file_list_of results = [row.as_py() for row in result_table.columns[0]] assert results == [ [ - {'latitude': 52.371807, 'longitude': 4.896029, 'altitude': None}, - {'latitude': 52.387386, 'longitude': 4.646219, 'altitude': None}, + {"latitude": 52.371807, "longitude": 4.896029, "altitude": None}, + {"latitude": 52.387386, "longitude": 4.646219, "altitude": None}, ], [], [ - {'latitude': 52.078663, 'longitude': 4.288788, 'altitude': None}, - {'latitude': 52.387386, 'longitude': 4.646219, 'altitude': None}, + {"latitude": 52.078663, "longitude": 4.288788, "altitude": None}, + {"latitude": 52.387386, "longitude": 4.646219, "altitude": None}, ], ] assert ( diff --git a/tests/io/test_pyarrow_visitor.py b/tests/io/test_pyarrow_visitor.py index 5b55bd61b6..c8571dacf1 100644 --- a/tests/io/test_pyarrow_visitor.py +++ b/tests/io/test_pyarrow_visitor.py @@ -39,6 +39,7 @@ DoubleType, FixedType, FloatType, + IcebergType, IntegerType, ListType, LongType, @@ -280,6 +281,19 @@ def test_pyarrow_map_to_iceberg() -> None: assert visit_pyarrow(pyarrow_map, _ConvertToIceberg()) == expected +@pytest.mark.parametrize( + "value_type, expected_result", + [ + (pa.string(), StringType()), + (pa.int32(), IntegerType()), + (pa.float64(), DoubleType()), + ], +) +def test_pyarrow_dictionary_encoded_type_to_iceberg(value_type: pa.DataType, expected_result: IcebergType) -> None: + pyarrow_dict = pa.dictionary(pa.int32(), value_type) + assert visit_pyarrow(pyarrow_dict, _ConvertToIceberg()) == expected_result + + def test_round_schema_conversion_simple(table_schema_simple: Schema) -> None: actual = str(pyarrow_to_schema(schema_to_pyarrow(table_schema_simple))) expected = """table { @@ -315,7 +329,7 @@ def test_round_schema_large_string() -> None: def test_simple_schema_has_missing_ids() -> None: schema = pa.schema([ - pa.field('foo', pa.string(), nullable=False), + pa.field("foo", pa.string(), nullable=False), ]) visitor = _HasIds() has_ids = visit_pyarrow(schema, visitor) @@ -324,8 +338,8 @@ def test_simple_schema_has_missing_ids() -> None: def test_simple_schema_has_missing_ids_partial() -> None: schema = pa.schema([ - pa.field('foo', pa.string(), nullable=False, metadata={"PARQUET:field_id": "1", "doc": "foo doc"}), - pa.field('bar', pa.int32(), nullable=False), + pa.field("foo", pa.string(), nullable=False, metadata={"PARQUET:field_id": "1", "doc": "foo doc"}), + pa.field("bar", pa.int32(), nullable=False), ]) visitor = _HasIds() has_ids = visit_pyarrow(schema, visitor) @@ -334,9 +348,9 @@ def test_simple_schema_has_missing_ids_partial() -> None: def test_nested_schema_has_missing_ids() -> None: schema = pa.schema([ - pa.field('foo', pa.string(), nullable=False), + pa.field("foo", pa.string(), nullable=False), pa.field( - 'quux', + "quux", pa.map_( pa.string(), pa.map_(pa.string(), pa.int32()), @@ -351,16 +365,16 @@ def test_nested_schema_has_missing_ids() -> None: def test_nested_schema_has_ids() -> None: schema = pa.schema([ - pa.field('foo', pa.string(), nullable=False, metadata={"PARQUET:field_id": "1", "doc": "foo doc"}), + pa.field("foo", pa.string(), nullable=False, metadata={"PARQUET:field_id": "1", "doc": "foo doc"}), pa.field( - 'quux', + "quux", pa.map_( pa.field("key", pa.string(), nullable=False, metadata={"PARQUET:field_id": "7"}), pa.field( "value", pa.map_( - pa.field('key', pa.string(), nullable=False, metadata={"PARQUET:field_id": "9"}), - pa.field('value', pa.int32(), metadata={"PARQUET:field_id": "10"}), + pa.field("key", pa.string(), nullable=False, metadata={"PARQUET:field_id": "9"}), + pa.field("value", pa.int32(), metadata={"PARQUET:field_id": "10"}), ), nullable=False, metadata={"PARQUET:field_id": "8"}, @@ -377,14 +391,14 @@ def test_nested_schema_has_ids() -> None: def test_nested_schema_has_partial_missing_ids() -> None: schema = pa.schema([ - pa.field('foo', pa.string(), nullable=False, metadata={"PARQUET:field_id": "1", "doc": "foo doc"}), + pa.field("foo", pa.string(), nullable=False, metadata={"PARQUET:field_id": "1", "doc": "foo doc"}), pa.field( - 'quux', + "quux", pa.map_( pa.field("key", pa.string(), nullable=False, metadata={"PARQUET:field_id": "7"}), pa.field( "value", - pa.map_(pa.field('key', pa.string(), nullable=False), pa.field('value', pa.int32())), + pa.map_(pa.field("key", pa.string(), nullable=False), pa.field("value", pa.int32())), nullable=False, ), ), @@ -412,9 +426,9 @@ def test_simple_pyarrow_schema_to_schema_missing_ids_using_name_mapping( ) -> None: schema = pyarrow_schema_simple_without_ids name_mapping = NameMapping([ - MappedField(field_id=1, names=['foo']), - MappedField(field_id=2, names=['bar']), - MappedField(field_id=3, names=['baz']), + MappedField(field_id=1, names=["foo"]), + MappedField(field_id=2, names=["bar"]), + MappedField(field_id=3, names=["baz"]), ]) assert pyarrow_to_schema(schema, name_mapping) == iceberg_schema_simple @@ -425,7 +439,7 @@ def test_simple_pyarrow_schema_to_schema_missing_ids_using_name_mapping_partial_ ) -> None: schema = pyarrow_schema_simple_without_ids name_mapping = NameMapping([ - MappedField(field_id=1, names=['foo']), + MappedField(field_id=1, names=["foo"]), ]) with pytest.raises(ValueError) as exc_info: _ = pyarrow_to_schema(schema, name_mapping) @@ -438,45 +452,45 @@ def test_nested_pyarrow_schema_to_schema_missing_ids_using_name_mapping( schema = pyarrow_schema_nested_without_ids name_mapping = NameMapping([ - MappedField(field_id=1, names=['foo']), - MappedField(field_id=2, names=['bar']), - MappedField(field_id=3, names=['baz']), - MappedField(field_id=4, names=['qux'], fields=[MappedField(field_id=5, names=['element'])]), + MappedField(field_id=1, names=["foo"]), + MappedField(field_id=2, names=["bar"]), + MappedField(field_id=3, names=["baz"]), + MappedField(field_id=4, names=["qux"], fields=[MappedField(field_id=5, names=["element"])]), MappedField( field_id=6, - names=['quux'], + names=["quux"], fields=[ - MappedField(field_id=7, names=['key']), + MappedField(field_id=7, names=["key"]), MappedField( field_id=8, - names=['value'], + names=["value"], fields=[ - MappedField(field_id=9, names=['key']), - MappedField(field_id=10, names=['value']), + MappedField(field_id=9, names=["key"]), + MappedField(field_id=10, names=["value"]), ], ), ], ), MappedField( field_id=11, - names=['location'], + names=["location"], fields=[ MappedField( field_id=12, - names=['element'], + names=["element"], fields=[ - MappedField(field_id=13, names=['latitude']), - MappedField(field_id=14, names=['longitude']), + MappedField(field_id=13, names=["latitude"]), + MappedField(field_id=14, names=["longitude"]), ], ) ], ), MappedField( field_id=15, - names=['person'], + names=["person"], fields=[ - MappedField(field_id=16, names=['name']), - MappedField(field_id=17, names=['age']), + MappedField(field_id=16, names=["name"]), + MappedField(field_id=17, names=["age"]), ], ), ]) @@ -486,9 +500,9 @@ def test_nested_pyarrow_schema_to_schema_missing_ids_using_name_mapping( def test_pyarrow_schema_to_schema_missing_ids_using_name_mapping_nested_missing_id() -> None: schema = pa.schema([ - pa.field('foo', pa.string(), nullable=False), + pa.field("foo", pa.string(), nullable=False), pa.field( - 'quux', + "quux", pa.map_( pa.string(), pa.map_(pa.string(), pa.int32()), @@ -498,17 +512,17 @@ def test_pyarrow_schema_to_schema_missing_ids_using_name_mapping_nested_missing_ ]) name_mapping = NameMapping([ - MappedField(field_id=1, names=['foo']), + MappedField(field_id=1, names=["foo"]), MappedField( field_id=6, - names=['quux'], + names=["quux"], fields=[ - MappedField(field_id=7, names=['key']), + MappedField(field_id=7, names=["key"]), MappedField( field_id=8, - names=['value'], + names=["value"], fields=[ - MappedField(field_id=10, names=['value']), + MappedField(field_id=10, names=["value"]), ], ), ], diff --git a/tests/table/test_init.py b/tests/table/test_init.py index 2bc78f3197..11d50db8a5 100644 --- a/tests/table/test_init.py +++ b/tests/table/test_init.py @@ -995,9 +995,9 @@ def test_correct_schema() -> None: # Should use the current schema, instead the one from the snapshot projection_schema = t.scan().projection() assert projection_schema == Schema( - NestedField(field_id=1, name='x', field_type=LongType(), required=True), - NestedField(field_id=2, name='y', field_type=LongType(), required=True), - NestedField(field_id=3, name='z', field_type=LongType(), required=True), + NestedField(field_id=1, name="x", field_type=LongType(), required=True), + NestedField(field_id=2, name="y", field_type=LongType(), required=True), + NestedField(field_id=3, name="z", field_type=LongType(), required=True), identifier_field_ids=[1, 2], ) assert projection_schema.schema_id == 1 @@ -1005,7 +1005,7 @@ def test_correct_schema() -> None: # When we explicitly filter on the commit, we want to have the schema that's linked to the snapshot projection_schema = t.scan(snapshot_id=123).projection() assert projection_schema == Schema( - NestedField(field_id=1, name='x', field_type=LongType(), required=True), + NestedField(field_id=1, name="x", field_type=LongType(), required=True), identifier_field_ids=[], ) assert projection_schema.schema_id == 0 @@ -1138,8 +1138,8 @@ def test_table_properties_raise_for_none_value(example_table_metadata_v2: Dict[s def test_serialize_commit_table_request() -> None: request = CommitTableRequest( - requirements=(AssertTableUUID(uuid='4bfd18a3-74c6-478e-98b1-71c4c32f4163'),), - identifier=TableIdentifier(namespace=['a'], name='b'), + requirements=(AssertTableUUID(uuid="4bfd18a3-74c6-478e-98b1-71c4c32f4163"),), + identifier=TableIdentifier(namespace=["a"], name="b"), ) deserialized_request = CommitTableRequest.model_validate_json(request.model_dump_json()) @@ -1149,17 +1149,17 @@ def test_serialize_commit_table_request() -> None: def test_partition_for_demo() -> None: import pyarrow as pa - test_pa_schema = pa.schema([('year', pa.int64()), ("n_legs", pa.int64()), ("animal", pa.string())]) + test_pa_schema = pa.schema([("year", pa.int64()), ("n_legs", pa.int64()), ("animal", pa.string())]) test_schema = Schema( - NestedField(field_id=1, name='year', field_type=StringType(), required=False), - NestedField(field_id=2, name='n_legs', field_type=IntegerType(), required=True), - NestedField(field_id=3, name='animal', field_type=StringType(), required=False), + NestedField(field_id=1, name="year", field_type=StringType(), required=False), + NestedField(field_id=2, name="n_legs", field_type=IntegerType(), required=True), + NestedField(field_id=3, name="animal", field_type=StringType(), required=False), schema_id=1, ) test_data = { - 'year': [2020, 2022, 2022, 2022, 2021, 2022, 2022, 2019, 2021], - 'n_legs': [2, 2, 2, 4, 4, 4, 4, 5, 100], - 'animal': ["Flamingo", "Parrot", "Parrot", "Horse", "Dog", "Horse", "Horse", "Brittle stars", "Centipede"], + "year": [2020, 2022, 2022, 2022, 2021, 2022, 2022, 2019, 2021], + "n_legs": [2, 2, 2, 4, 4, 4, 4, 5, 100], + "animal": ["Flamingo", "Parrot", "Parrot", "Horse", "Dog", "Horse", "Horse", "Brittle stars", "Centipede"], } arrow_table = pa.Table.from_pydict(test_data, schema=test_pa_schema) partition_spec = PartitionSpec( @@ -1183,11 +1183,11 @@ def test_partition_for_demo() -> None: def test_identity_partition_on_multi_columns() -> None: import pyarrow as pa - test_pa_schema = pa.schema([('born_year', pa.int64()), ("n_legs", pa.int64()), ("animal", pa.string())]) + test_pa_schema = pa.schema([("born_year", pa.int64()), ("n_legs", pa.int64()), ("animal", pa.string())]) test_schema = Schema( - NestedField(field_id=1, name='born_year', field_type=StringType(), required=False), - NestedField(field_id=2, name='n_legs', field_type=IntegerType(), required=True), - NestedField(field_id=3, name='animal', field_type=StringType(), required=False), + NestedField(field_id=1, name="born_year", field_type=StringType(), required=False), + NestedField(field_id=2, name="n_legs", field_type=IntegerType(), required=True), + NestedField(field_id=3, name="animal", field_type=StringType(), required=False), schema_id=1, ) # 5 partitions, 6 unique row values, 12 rows @@ -1210,9 +1210,9 @@ def test_identity_partition_on_multi_columns() -> None: for _ in range(1000): random.shuffle(test_rows) test_data = { - 'born_year': [row[0] for row in test_rows], - 'n_legs': [row[1] for row in test_rows], - 'animal': [row[2] for row in test_rows], + "born_year": [row[0] for row in test_rows], + "n_legs": [row[1] for row in test_rows], + "animal": [row[2] for row in test_rows], } arrow_table = pa.Table.from_pydict(test_data, schema=test_pa_schema) @@ -1222,7 +1222,7 @@ def test_identity_partition_on_multi_columns() -> None: concatenated_arrow_table = pa.concat_tables([table_partition.arrow_table_partition for table_partition in result]) assert concatenated_arrow_table.num_rows == arrow_table.num_rows assert concatenated_arrow_table.sort_by([ - ('born_year', 'ascending'), - ('n_legs', 'ascending'), - ('animal', 'ascending'), - ]) == arrow_table.sort_by([('born_year', 'ascending'), ('n_legs', 'ascending'), ('animal', 'ascending')]) + ("born_year", "ascending"), + ("n_legs", "ascending"), + ("animal", "ascending"), + ]) == arrow_table.sort_by([("born_year", "ascending"), ("n_legs", "ascending"), ("animal", "ascending")]) diff --git a/tests/table/test_metadata.py b/tests/table/test_metadata.py index b4e30a6b84..0e2b91f24b 100644 --- a/tests/table/test_metadata.py +++ b/tests/table/test_metadata.py @@ -220,7 +220,7 @@ def test_new_table_metadata_with_explicit_v1_format() -> None: partition_spec=partition_spec, sort_order=sort_order, location="s3://some_v1_location/", - properties={'format-version': "1"}, + properties={"format-version": "1"}, ) expected_schema = Schema( diff --git a/tests/table/test_name_mapping.py b/tests/table/test_name_mapping.py index e039415ce3..d4a2bf6c41 100644 --- a/tests/table/test_name_mapping.py +++ b/tests/table/test_name_mapping.py @@ -30,45 +30,45 @@ @pytest.fixture(scope="session") def table_name_mapping_nested() -> NameMapping: return NameMapping([ - MappedField(field_id=1, names=['foo']), - MappedField(field_id=2, names=['bar']), - MappedField(field_id=3, names=['baz']), - MappedField(field_id=4, names=['qux'], fields=[MappedField(field_id=5, names=['element'])]), + MappedField(field_id=1, names=["foo"]), + MappedField(field_id=2, names=["bar"]), + MappedField(field_id=3, names=["baz"]), + MappedField(field_id=4, names=["qux"], fields=[MappedField(field_id=5, names=["element"])]), MappedField( field_id=6, - names=['quux'], + names=["quux"], fields=[ - MappedField(field_id=7, names=['key']), + MappedField(field_id=7, names=["key"]), MappedField( field_id=8, - names=['value'], + names=["value"], fields=[ - MappedField(field_id=9, names=['key']), - MappedField(field_id=10, names=['value']), + MappedField(field_id=9, names=["key"]), + MappedField(field_id=10, names=["value"]), ], ), ], ), MappedField( field_id=11, - names=['location'], + names=["location"], fields=[ MappedField( field_id=12, - names=['element'], + names=["element"], fields=[ - MappedField(field_id=13, names=['latitude']), - MappedField(field_id=14, names=['longitude']), + MappedField(field_id=13, names=["latitude"]), + MappedField(field_id=14, names=["longitude"]), ], ) ], ), MappedField( field_id=15, - names=['person'], + names=["person"], fields=[ - MappedField(field_id=16, names=['name']), - MappedField(field_id=17, names=['age']), + MappedField(field_id=16, names=["name"]), + MappedField(field_id=17, names=["age"]), ], ), ]) @@ -80,7 +80,7 @@ def test_json_mapped_field_deserialization() -> None: "names": ["id", "record_id"] } """ - assert MappedField(field_id=1, names=['id', 'record_id']) == MappedField.model_validate_json(mapped_field) + assert MappedField(field_id=1, names=["id", "record_id"]) == MappedField.model_validate_json(mapped_field) mapped_field_with_null_fields = """{ "field-id": 1, @@ -88,7 +88,7 @@ def test_json_mapped_field_deserialization() -> None: "fields": null } """ - assert MappedField(field_id=1, names=['id', 'record_id']) == MappedField.model_validate_json(mapped_field_with_null_fields) + assert MappedField(field_id=1, names=["id", "record_id"]) == MappedField.model_validate_json(mapped_field_with_null_fields) def test_json_name_mapping_deserialization() -> None: @@ -133,14 +133,14 @@ def test_json_name_mapping_deserialization() -> None: """ assert parse_mapping_from_json(name_mapping) == NameMapping([ - MappedField(field_id=1, names=['id', 'record_id']), - MappedField(field_id=2, names=['data']), + MappedField(field_id=1, names=["id", "record_id"]), + MappedField(field_id=2, names=["data"]), MappedField( - names=['location'], + names=["location"], field_id=3, fields=[ - MappedField(field_id=4, names=['latitude', 'lat']), - MappedField(field_id=5, names=['longitude', 'long']), + MappedField(field_id=4, names=["latitude", "lat"]), + MappedField(field_id=5, names=["longitude", "long"]), ], ), ]) @@ -155,14 +155,14 @@ def test_json_serialization(table_name_mapping_nested: NameMapping) -> None: def test_name_mapping_to_string() -> None: nm = NameMapping([ - MappedField(field_id=1, names=['id', 'record_id']), - MappedField(field_id=2, names=['data']), + MappedField(field_id=1, names=["id", "record_id"]), + MappedField(field_id=2, names=["data"]), MappedField( - names=['location'], + names=["location"], field_id=3, fields=[ - MappedField(field_id=4, names=['lat', 'latitude']), - MappedField(field_id=5, names=['long', 'longitude']), + MappedField(field_id=4, names=["lat", "latitude"]), + MappedField(field_id=5, names=["long", "longitude"]), ], ), ]) @@ -184,64 +184,64 @@ def test_mapping_from_schema(table_schema_nested: Schema, table_name_mapping_nes def test_mapping_by_name(table_name_mapping_nested: NameMapping) -> None: assert table_name_mapping_nested._field_by_name == { - 'person.age': MappedField(field_id=17, names=['age']), - 'person.name': MappedField(field_id=16, names=['name']), - 'person': MappedField( + "person.age": MappedField(field_id=17, names=["age"]), + "person.name": MappedField(field_id=16, names=["name"]), + "person": MappedField( field_id=15, - names=['person'], - fields=[MappedField(field_id=16, names=['name']), MappedField(field_id=17, names=['age'])], + names=["person"], + fields=[MappedField(field_id=16, names=["name"]), MappedField(field_id=17, names=["age"])], ), - 'location.element.longitude': MappedField(field_id=14, names=['longitude']), - 'location.element.latitude': MappedField(field_id=13, names=['latitude']), - 'location.element': MappedField( + "location.element.longitude": MappedField(field_id=14, names=["longitude"]), + "location.element.latitude": MappedField(field_id=13, names=["latitude"]), + "location.element": MappedField( field_id=12, - names=['element'], - fields=[MappedField(field_id=13, names=['latitude']), MappedField(field_id=14, names=['longitude'])], + names=["element"], + fields=[MappedField(field_id=13, names=["latitude"]), MappedField(field_id=14, names=["longitude"])], ), - 'location': MappedField( + "location": MappedField( field_id=11, - names=['location'], + names=["location"], fields=[ MappedField( field_id=12, - names=['element'], - fields=[MappedField(field_id=13, names=['latitude']), MappedField(field_id=14, names=['longitude'])], + names=["element"], + fields=[MappedField(field_id=13, names=["latitude"]), MappedField(field_id=14, names=["longitude"])], ) ], ), - 'quux.value.value': MappedField(field_id=10, names=['value']), - 'quux.value.key': MappedField(field_id=9, names=['key']), - 'quux.value': MappedField( + "quux.value.value": MappedField(field_id=10, names=["value"]), + "quux.value.key": MappedField(field_id=9, names=["key"]), + "quux.value": MappedField( field_id=8, - names=['value'], - fields=[MappedField(field_id=9, names=['key']), MappedField(field_id=10, names=['value'])], + names=["value"], + fields=[MappedField(field_id=9, names=["key"]), MappedField(field_id=10, names=["value"])], ), - 'quux.key': MappedField(field_id=7, names=['key']), - 'quux': MappedField( + "quux.key": MappedField(field_id=7, names=["key"]), + "quux": MappedField( field_id=6, - names=['quux'], + names=["quux"], fields=[ - MappedField(field_id=7, names=['key']), + MappedField(field_id=7, names=["key"]), MappedField( field_id=8, - names=['value'], - fields=[MappedField(field_id=9, names=['key']), MappedField(field_id=10, names=['value'])], + names=["value"], + fields=[MappedField(field_id=9, names=["key"]), MappedField(field_id=10, names=["value"])], ), ], ), - 'qux.element': MappedField(field_id=5, names=['element']), - 'qux': MappedField(field_id=4, names=['qux'], fields=[MappedField(field_id=5, names=['element'])]), - 'baz': MappedField(field_id=3, names=['baz']), - 'bar': MappedField(field_id=2, names=['bar']), - 'foo': MappedField(field_id=1, names=['foo']), + "qux.element": MappedField(field_id=5, names=["element"]), + "qux": MappedField(field_id=4, names=["qux"], fields=[MappedField(field_id=5, names=["element"])]), + "baz": MappedField(field_id=3, names=["baz"]), + "bar": MappedField(field_id=2, names=["bar"]), + "foo": MappedField(field_id=1, names=["foo"]), } def test_mapping_lookup_by_name(table_name_mapping_nested: NameMapping) -> None: - assert table_name_mapping_nested.find("foo") == MappedField(field_id=1, names=['foo']) - assert table_name_mapping_nested.find("location.element.latitude") == MappedField(field_id=13, names=['latitude']) - assert table_name_mapping_nested.find("location", "element", "latitude") == MappedField(field_id=13, names=['latitude']) - assert table_name_mapping_nested.find(*["location", "element", "latitude"]) == MappedField(field_id=13, names=['latitude']) + assert table_name_mapping_nested.find("foo") == MappedField(field_id=1, names=["foo"]) + assert table_name_mapping_nested.find("location.element.latitude") == MappedField(field_id=13, names=["latitude"]) + assert table_name_mapping_nested.find("location", "element", "latitude") == MappedField(field_id=13, names=["latitude"]) + assert table_name_mapping_nested.find(*["location", "element", "latitude"]) == MappedField(field_id=13, names=["latitude"]) with pytest.raises(ValueError, match="Could not find field with name: boom"): table_name_mapping_nested.find("boom") @@ -264,48 +264,48 @@ def test_update_mapping(table_name_mapping_nested: NameMapping) -> None: } expected = NameMapping([ - MappedField(field_id=1, names=['foo', 'foo_update']), - MappedField(field_id=2, names=['bar']), - MappedField(field_id=3, names=['baz']), - MappedField(field_id=4, names=['qux'], fields=[MappedField(field_id=5, names=['element'])]), + MappedField(field_id=1, names=["foo", "foo_update"]), + MappedField(field_id=2, names=["bar"]), + MappedField(field_id=3, names=["baz"]), + MappedField(field_id=4, names=["qux"], fields=[MappedField(field_id=5, names=["element"])]), MappedField( field_id=6, - names=['quux'], + names=["quux"], fields=[ - MappedField(field_id=7, names=['key']), + MappedField(field_id=7, names=["key"]), MappedField( field_id=8, - names=['value'], + names=["value"], fields=[ - MappedField(field_id=9, names=['key']), - MappedField(field_id=10, names=['value']), + MappedField(field_id=9, names=["key"]), + MappedField(field_id=10, names=["value"]), ], ), ], ), MappedField( field_id=11, - names=['location'], + names=["location"], fields=[ MappedField( field_id=12, - names=['element'], + names=["element"], fields=[ - MappedField(field_id=13, names=['latitude']), - MappedField(field_id=14, names=['longitude']), + MappedField(field_id=13, names=["latitude"]), + MappedField(field_id=14, names=["longitude"]), ], ) ], ), MappedField( field_id=15, - names=['person'], + names=["person"], fields=[ - MappedField(field_id=17, names=['age']), - MappedField(field_id=19, names=['name']), - MappedField(field_id=20, names=['add_20']), + MappedField(field_id=17, names=["age"]), + MappedField(field_id=19, names=["name"]), + MappedField(field_id=20, names=["add_20"]), ], ), - MappedField(field_id=18, names=['add_18']), + MappedField(field_id=18, names=["add_18"]), ]) assert update_mapping(table_name_mapping_nested, updates, adds) == expected diff --git a/tests/table/test_snapshots.py b/tests/table/test_snapshots.py index e85ecce506..2569a11dc2 100644 --- a/tests/table/test_snapshots.py +++ b/tests/table/test_snapshots.py @@ -156,9 +156,9 @@ def test_snapshot_summary_collector(table_schema_simple: Schema) -> None: ssc.add_file(data_file, schema=table_schema_simple) assert ssc.build() == { - 'added-data-files': '1', - 'added-files-size': '1234', - 'added-records': '100', + "added-data-files": "1", + "added-files-size": "1234", + "added-records": "100", } @@ -174,7 +174,7 @@ def test_snapshot_summary_collector_with_partition() -> None: NestedField(field_id=2, name="string_field", field_type=StringType(), required=False), NestedField(field_id=3, name="int_field", field_type=IntegerType(), required=False), ) - spec = PartitionSpec(PartitionField(source_id=3, field_id=1001, transform=IdentityTransform(), name='int_field')) + spec = PartitionSpec(PartitionField(source_id=3, field_id=1001, transform=IdentityTransform(), name="int_field")) data_file_1 = DataFile(content=DataFileContent.DATA, record_count=100, file_size_in_bytes=1234, partition=Record(int_field=1)) data_file_2 = DataFile(content=DataFileContent.DATA, record_count=200, file_size_in_bytes=4321, partition=Record(int_field=2)) # When @@ -184,13 +184,13 @@ def test_snapshot_summary_collector_with_partition() -> None: # Then assert ssc.build() == { - 'added-files-size': '1234', - 'removed-files-size': '5555', - 'added-data-files': '1', - 'deleted-data-files': '2', - 'added-records': '100', - 'deleted-records': '300', - 'changed-partition-count': '2', + "added-files-size": "1234", + "removed-files-size": "5555", + "added-data-files": "1", + "deleted-data-files": "2", + "added-records": "100", + "deleted-records": "300", + "changed-partition-count": "2", } # When @@ -198,15 +198,15 @@ def test_snapshot_summary_collector_with_partition() -> None: # Then assert ssc.build() == { - 'added-files-size': '1234', - 'removed-files-size': '5555', - 'added-data-files': '1', - 'deleted-data-files': '2', - 'added-records': '100', - 'deleted-records': '300', - 'changed-partition-count': '2', - 'partitions.int_field=1': 'added-files-size=1234,removed-files-size=1234,added-data-files=1,deleted-data-files=1,added-records=100,deleted-records=100', - 'partitions.int_field=2': 'removed-files-size=4321,deleted-data-files=1,deleted-records=200', + "added-files-size": "1234", + "removed-files-size": "5555", + "added-data-files": "1", + "deleted-data-files": "2", + "added-records": "100", + "deleted-records": "300", + "changed-partition-count": "2", + "partitions.int_field=1": "added-files-size=1234,removed-files-size=1234,added-data-files=1,deleted-data-files=1,added-records=100,deleted-records=100", + "partitions.int_field=2": "removed-files-size=4321,deleted-data-files=1,deleted-records=200", } @@ -214,12 +214,12 @@ def test_merge_snapshot_summaries_empty() -> None: assert update_snapshot_summaries(Summary(Operation.APPEND)) == Summary( operation=Operation.APPEND, **{ - 'total-data-files': '0', - 'total-delete-files': '0', - 'total-records': '0', - 'total-files-size': '0', - 'total-position-deletes': '0', - 'total-equality-deletes': '0', + "total-data-files": "0", + "total-delete-files": "0", + "total-records": "0", + "total-files-size": "0", + "total-position-deletes": "0", + "total-equality-deletes": "0", }, ) @@ -229,12 +229,12 @@ def test_merge_snapshot_summaries_new_summary() -> None: summary=Summary( operation=Operation.APPEND, **{ - 'added-data-files': '1', - 'added-delete-files': '2', - 'added-equality-deletes': '3', - 'added-files-size': '4', - 'added-position-deletes': '5', - 'added-records': '6', + "added-data-files": "1", + "added-delete-files": "2", + "added-equality-deletes": "3", + "added-files-size": "4", + "added-position-deletes": "5", + "added-records": "6", }, ) ) @@ -242,18 +242,18 @@ def test_merge_snapshot_summaries_new_summary() -> None: expected = Summary( operation=Operation.APPEND, **{ - 'added-data-files': '1', - 'added-delete-files': '2', - 'added-equality-deletes': '3', - 'added-files-size': '4', - 'added-position-deletes': '5', - 'added-records': '6', - 'total-data-files': '1', - 'total-delete-files': '2', - 'total-records': '6', - 'total-files-size': '4', - 'total-position-deletes': '5', - 'total-equality-deletes': '3', + "added-data-files": "1", + "added-delete-files": "2", + "added-equality-deletes": "3", + "added-files-size": "4", + "added-position-deletes": "5", + "added-records": "6", + "total-data-files": "1", + "total-delete-files": "2", + "total-records": "6", + "total-files-size": "4", + "total-position-deletes": "5", + "total-equality-deletes": "3", }, ) @@ -265,44 +265,44 @@ def test_merge_snapshot_summaries_overwrite_summary() -> None: summary=Summary( operation=Operation.OVERWRITE, **{ - 'added-data-files': '1', - 'added-delete-files': '2', - 'added-equality-deletes': '3', - 'added-files-size': '4', - 'added-position-deletes': '5', - 'added-records': '6', + "added-data-files": "1", + "added-delete-files": "2", + "added-equality-deletes": "3", + "added-files-size": "4", + "added-position-deletes": "5", + "added-records": "6", }, ), previous_summary={ - 'total-data-files': '1', - 'total-delete-files': '1', - 'total-equality-deletes': '1', - 'total-files-size': '1', - 'total-position-deletes': '1', - 'total-records': '1', + "total-data-files": "1", + "total-delete-files": "1", + "total-equality-deletes": "1", + "total-files-size": "1", + "total-position-deletes": "1", + "total-records": "1", }, truncate_full_table=True, ) expected = { - 'added-data-files': '1', - 'added-delete-files': '2', - 'added-equality-deletes': '3', - 'added-files-size': '4', - 'added-position-deletes': '5', - 'added-records': '6', - 'total-data-files': '1', - 'total-records': '6', - 'total-delete-files': '2', - 'total-equality-deletes': '3', - 'total-files-size': '4', - 'total-position-deletes': '5', - 'deleted-data-files': '1', - 'removed-delete-files': '1', - 'deleted-records': '1', - 'removed-files-size': '1', - 'removed-position-deletes': '1', - 'removed-equality-deletes': '1', + "added-data-files": "1", + "added-delete-files": "2", + "added-equality-deletes": "3", + "added-files-size": "4", + "added-position-deletes": "5", + "added-records": "6", + "total-data-files": "1", + "total-records": "6", + "total-delete-files": "2", + "total-equality-deletes": "3", + "total-files-size": "4", + "total-position-deletes": "5", + "deleted-data-files": "1", + "removed-delete-files": "1", + "deleted-records": "1", + "removed-files-size": "1", + "removed-position-deletes": "1", + "removed-equality-deletes": "1", } assert actual.additional_properties == expected @@ -324,15 +324,15 @@ def test_invalid_type() -> None: summary=Summary( operation=Operation.OVERWRITE, **{ - 'added-data-files': '1', - 'added-delete-files': '2', - 'added-equality-deletes': '3', - 'added-files-size': '4', - 'added-position-deletes': '5', - 'added-records': '6', + "added-data-files": "1", + "added-delete-files": "2", + "added-equality-deletes": "3", + "added-files-size": "4", + "added-position-deletes": "5", + "added-records": "6", }, ), - previous_summary={'total-data-files': 'abc'}, # should be a number + previous_summary={"total-data-files": "abc"}, # should be a number truncate_full_table=True, ) diff --git a/tests/test_serializers.py b/tests/test_serializers.py index 140db02700..ad40ea08e0 100644 --- a/tests/test_serializers.py +++ b/tests/test_serializers.py @@ -44,7 +44,7 @@ def test_legacy_current_snapshot_id( ToOutputFile.table_metadata(metadata, PyArrowFileIO().new_output(location=metadata_location), overwrite=True) with PyArrowFileIO().new_input(location=metadata_location).open() as input_stream: metadata_json_bytes = input_stream.read() - assert json.loads(metadata_json_bytes)['current-snapshot-id'] == -1 + assert json.loads(metadata_json_bytes)["current-snapshot-id"] == -1 backwards_compatible_static_table = StaticTable.from_metadata(metadata_location) assert backwards_compatible_static_table.metadata.current_snapshot_id is None assert backwards_compatible_static_table.metadata == static_table.metadata diff --git a/tests/test_transforms.py b/tests/test_transforms.py index 4dc3d9819f..3a9ffd6009 100644 --- a/tests/test_transforms.py +++ b/tests/test_transforms.py @@ -17,7 +17,7 @@ # pylint: disable=eval-used,protected-access,redefined-outer-name from datetime import date from decimal import Decimal -from typing import Any, Callable, Optional +from typing import TYPE_CHECKING, Any, Callable, Optional from uuid import UUID import mmh3 as mmh3 @@ -69,6 +69,7 @@ TimestampLiteral, literal, ) +from pyiceberg.partitioning import _to_partition_representation from pyiceberg.schema import Accessor from pyiceberg.transforms import ( BucketTransform, @@ -111,6 +112,9 @@ timestamptz_to_micros, ) +if TYPE_CHECKING: + import pyarrow as pa + @pytest.mark.parametrize( "test_input,test_type,expected", @@ -1550,7 +1554,7 @@ def test_strict_bucket_bytes(bound_reference_binary: BoundReference[int]) -> Non def test_strict_bucket_uuid(bound_reference_uuid: BoundReference[int]) -> None: - value = literal(UUID('12345678123456781234567812345678')) + value = literal(UUID("12345678123456781234567812345678")) transform: Transform[Any, int] = BucketTransform(num_buckets=10) _test_projection( lhs=transform.strict_project(name="name", pred=BoundNotEqualTo(term=bound_reference_uuid, literal=value)), @@ -1575,14 +1579,14 @@ def test_strict_bucket_uuid(bound_reference_uuid: BoundReference[int]) -> None: _test_projection( lhs=transform.strict_project( name="name", - pred=BoundNotIn(term=bound_reference_uuid, literals={value, literal(UUID('12345678123456781234567812345679'))}), + pred=BoundNotIn(term=bound_reference_uuid, literals={value, literal(UUID("12345678123456781234567812345679"))}), ), rhs=NotIn(term=Reference("name"), literals={1, 4}), ) _test_projection( lhs=transform.strict_project( name="name", - pred=BoundIn(term=bound_reference_uuid, literals={value, literal(UUID('12345678123456781234567812345679'))}), + pred=BoundIn(term=bound_reference_uuid, literals={value, literal(UUID("12345678123456781234567812345679"))}), ), rhs=None, ) @@ -1808,3 +1812,31 @@ def test_strict_binary(bound_reference_binary: BoundReference[str]) -> None: _test_projection( lhs=transform.strict_project(name="name", pred=BoundIn(term=bound_reference_binary, literals=set_of_literals)), rhs=None ) + + +@pytest.mark.parametrize( + "transform", + [ + pytest.param(YearTransform(), id="year_transform"), + pytest.param(MonthTransform(), id="month_transform"), + pytest.param(DayTransform(), id="day_transform"), + pytest.param(HourTransform(), id="hour_transform"), + ], +) +@pytest.mark.parametrize( + "source_col, source_type", [("date", DateType()), ("timestamp", TimestampType()), ("timestamptz", TimestamptzType())] +) +def test_ymd_pyarrow_transforms( + arrow_table_date_timestamps: "pa.Table", + source_col: str, + source_type: PrimitiveType, + transform: Transform[Any, Any], +) -> None: + if transform.can_transform(source_type): + assert transform.pyarrow_transform(source_type)(arrow_table_date_timestamps[source_col]).to_pylist() == [ + transform.transform(source_type)(_to_partition_representation(source_type, v)) + for v in arrow_table_date_timestamps[source_col].to_pylist() + ] + else: + with pytest.raises(ValueError): + transform.pyarrow_transform(DateType())(arrow_table_date_timestamps[source_col]) diff --git a/tests/utils/test_config.py b/tests/utils/test_config.py index 2f15bb56d8..066e7d7cc0 100644 --- a/tests/utils/test_config.py +++ b/tests/utils/test_config.py @@ -50,8 +50,8 @@ def test_from_environment_variables_uppercase() -> None: ) def test_fix_nested_objects_from_environment_variables() -> None: assert Config().get_catalog_config("PRODUCTION") == { - 's3.region': 'eu-north-1', - 's3.access-key-id': 'username', + "s3.region": "eu-north-1", + "s3.access-key-id": "username", } diff --git a/tests/utils/test_decimal.py b/tests/utils/test_decimal.py index 419cf05916..3e67bf691a 100644 --- a/tests/utils/test_decimal.py +++ b/tests/utils/test_decimal.py @@ -45,5 +45,5 @@ def test_decimal_required_bytes() -> None: def test_decimal_to_bytes() -> None: # Check the boundary between 2 and 3 bytes. # 2 bytes has a minimum of -32,768 and a maximum value of 32,767 (inclusive). - assert decimal_to_bytes(Decimal('32767.')) == b'\x7f\xff' - assert decimal_to_bytes(Decimal('32768.')) == b'\x00\x80\x00' + assert decimal_to_bytes(Decimal("32767.")) == b"\x7f\xff" + assert decimal_to_bytes(Decimal("32768.")) == b"\x00\x80\x00"