From 89cd401f810699e7affd2400e06136deaa8b84c9 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Tue, 15 Oct 2024 18:35:25 +0200 Subject: [PATCH 1/9] bring in path normalization function from v2, and add a failing test --- src/zarr/storage/_utils.py | 44 ++++++++++++++++++++++++++++++++++++++ tests/v3/test_api.py | 27 ++++++++++++++++++++++- 2 files changed, 70 insertions(+), 1 deletion(-) diff --git a/src/zarr/storage/_utils.py b/src/zarr/storage/_utils.py index cbc9c42bbd..f68a79bbe9 100644 --- a/src/zarr/storage/_utils.py +++ b/src/zarr/storage/_utils.py @@ -6,6 +6,50 @@ from zarr.core.buffer import Buffer +def normalize_path(path: str | bytes | object) -> str: + # handle bytes + if isinstance(path, bytes): + result = str(path, "ascii") + + # ensure str + elif not isinstance(path, str): + result = str(path) + + else: + result = path + + # convert backslash to forward slash + result = result.replace("\\", "/") + + # ensure no leading slash + while len(result) > 0 and result[0] == "/": + result = result[1:] + + # ensure no trailing slash + while len(result) > 0 and result[-1] == "/": + result = result[:-1] + + # collapse any repeated slashes + previous_char = None + collapsed = "" + for char in result: + if char == "/" and previous_char == "/": + pass + else: + collapsed += char + previous_char = char + result = collapsed + + # don't allow path segments with just '.' or '..' + segments = result.split("/") + if any(s in {".", ".."} for s in segments): + raise ValueError( + f"The path {path} is invalid because its string representation contains '.' or '..' segments." + ) + + return result + + def _normalize_interval_index( data: Buffer, interval: None | tuple[int | None, int | None] ) -> tuple[int, int]: diff --git a/tests/v3/test_api.py b/tests/v3/test_api.py index 0614185f68..03005cc15d 100644 --- a/tests/v3/test_api.py +++ b/tests/v3/test_api.py @@ -1,5 +1,6 @@ import pathlib import warnings +from typing import Literal import numpy as np import pytest @@ -10,9 +11,19 @@ import zarr.core.group from zarr import Array, Group from zarr.abc.store import Store -from zarr.api.synchronous import create, group, load, open, open_group, save, save_array, save_group +from zarr.api.synchronous import ( + create, + group, + load, + open, + open_group, + save, + save_array, + save_group, +) from zarr.core.common import ZarrFormat from zarr.errors import MetadataValidationError +from zarr.storage._utils import normalize_path from zarr.storage.memory import MemoryStore @@ -37,6 +48,20 @@ def test_create_array(memory_store: Store) -> None: assert z.chunks == (40,) +@pytest.mark.parametrize("path", ["/", "///foo/bar"]) +@pytest.mark.parametrize("node_type", ["array", "group"]) +def test_open_normalized_path( + memory_store: MemoryStore, path: str, node_type: Literal["array", "group"] +) -> None: + node: Group | Array + if node_type == "group": + node = group(store=memory_store, path=path) + elif node_type == "array": + node = create(store=memory_store, path=path, shape=(2,)) + + assert node.path == normalize_path(path) + + async def test_open_array(memory_store: MemoryStore) -> None: store = memory_store From 85af0bd733ffa28a017d738ca58e7a3f0b3baba2 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Tue, 15 Oct 2024 20:50:14 +0200 Subject: [PATCH 2/9] rephrase comment --- src/zarr/storage/_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/zarr/storage/_utils.py b/src/zarr/storage/_utils.py index f68a79bbe9..0ea65f4712 100644 --- a/src/zarr/storage/_utils.py +++ b/src/zarr/storage/_utils.py @@ -40,7 +40,7 @@ def normalize_path(path: str | bytes | object) -> str: previous_char = char result = collapsed - # don't allow path segments with just '.' or '..' + # disallow path segments with just '.' or '..' segments = result.split("/") if any(s in {".", ".."} for s in segments): raise ValueError( From e331a3bf6253a44066803dce104e9219dae9ecee Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Tue, 15 Oct 2024 22:16:07 +0200 Subject: [PATCH 3/9] simplify storepath creation --- src/zarr/api/asynchronous.py | 33 +++++++++---------------------- src/zarr/storage/_utils.py | 19 ++++++++++++------ src/zarr/storage/common.py | 34 +++++++++++++++++++++----------- tests/v3/test_group.py | 1 + tests/v3/test_store/test_core.py | 28 ++++++++++++++++++++++++++ 5 files changed, 74 insertions(+), 41 deletions(-) diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py index 559049ae4f..e500562c4c 100644 --- a/src/zarr/api/asynchronous.py +++ b/src/zarr/api/asynchronous.py @@ -170,10 +170,7 @@ async def consolidate_metadata( The group, with the ``consolidated_metadata`` field set to include the metadata of each child node. """ - store_path = await make_store_path(store) - - if path is not None: - store_path = store_path / path + store_path = await make_store_path(store, path=path) group = await AsyncGroup.open(store_path, zarr_format=zarr_format, use_consolidated=False) group.store_path.store._check_writable() @@ -291,10 +288,7 @@ async def open( """ zarr_format = _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format) - store_path = await make_store_path(store, mode=mode, storage_options=storage_options) - - if path is not None: - store_path = store_path / path + store_path = await make_store_path(store, mode=mode, path=path, storage_options=storage_options) if "shape" not in kwargs and mode in {"a", "w", "w-"}: try: @@ -401,9 +395,7 @@ async def save_array( ) mode = kwargs.pop("mode", None) - store_path = await make_store_path(store, mode=mode, storage_options=storage_options) - if path is not None: - store_path = store_path / path + store_path = await make_store_path(store, path=path, mode=mode, storage_options=storage_options) new = await AsyncArray.create( store_path, zarr_format=zarr_format, @@ -582,9 +574,7 @@ async def group( mode = None if isinstance(store, Store) else cast(AccessModeLiteral, "a") - store_path = await make_store_path(store, mode=mode, storage_options=storage_options) - if path is not None: - store_path = store_path / path + store_path = await make_store_path(store, path=path, mode=mode, storage_options=storage_options) if chunk_store is not None: warnings.warn("chunk_store is not yet implemented", RuntimeWarning, stacklevel=2) @@ -697,9 +687,7 @@ async def open_group( if chunk_store is not None: warnings.warn("chunk_store is not yet implemented", RuntimeWarning, stacklevel=2) - store_path = await make_store_path(store, mode=mode, storage_options=storage_options) - if path is not None: - store_path = store_path / path + store_path = await make_store_path(store, mode=mode, storage_options=storage_options, path=path) if attributes is None: attributes = {} @@ -883,9 +871,7 @@ async def create( if not isinstance(store, Store | StorePath): mode = "a" - store_path = await make_store_path(store, mode=mode, storage_options=storage_options) - if path is not None: - store_path = store_path / path + store_path = await make_store_path(store, path=path, mode=mode, storage_options=storage_options) return await AsyncArray.create( store_path, @@ -925,6 +911,7 @@ async def empty( retrieve data from an empty Zarr array, any values may be returned, and these are not guaranteed to be stable from one access to the next. """ + return await create(shape=shape, fill_value=None, **kwargs) @@ -1044,7 +1031,7 @@ async def open_array( store: StoreLike | None = None, zarr_version: ZarrFormat | None = None, # deprecated zarr_format: ZarrFormat | None = None, - path: PathLike | None = None, + path: PathLike = "", storage_options: dict[str, Any] | None = None, **kwargs: Any, # TODO: type kwargs as valid args to save ) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]: @@ -1071,9 +1058,7 @@ async def open_array( """ mode = kwargs.pop("mode", None) - store_path = await make_store_path(store, mode=mode) - if path is not None: - store_path = store_path / path + store_path = await make_store_path(store, path=path, mode=mode) zarr_format = _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format) diff --git a/src/zarr/storage/_utils.py b/src/zarr/storage/_utils.py index 0ea65f4712..6f7f958451 100644 --- a/src/zarr/storage/_utils.py +++ b/src/zarr/storage/_utils.py @@ -1,23 +1,30 @@ from __future__ import annotations +from pathlib import Path from typing import TYPE_CHECKING +from upath import UPath + if TYPE_CHECKING: from zarr.core.buffer import Buffer -def normalize_path(path: str | bytes | object) -> str: +def normalize_path(path: str | bytes | Path | UPath | None) -> str: # handle bytes - if isinstance(path, bytes): + if path is None: + result = "" + elif isinstance(path, bytes): result = str(path, "ascii") - # ensure str - elif not isinstance(path, str): + elif isinstance(path, Path | UPath): result = str(path) - else: + elif isinstance(path, str): result = path + else: + raise TypeError(f'Object {path} has an invalid type for "path": {type(path).__name__}') + # convert backslash to forward slash result = result.replace("\\", "/") @@ -44,7 +51,7 @@ def normalize_path(path: str | bytes | object) -> str: segments = result.split("/") if any(s in {".", ".."} for s in segments): raise ValueError( - f"The path {path} is invalid because its string representation contains '.' or '..' segments." + f"The path {path!r} is invalid because its string representation contains '.' or '..' segments." ) return result diff --git a/src/zarr/storage/common.py b/src/zarr/storage/common.py index 101e8f38af..e5b820c65b 100644 --- a/src/zarr/storage/common.py +++ b/src/zarr/storage/common.py @@ -8,6 +8,7 @@ from zarr.core.buffer import Buffer, default_buffer_prototype from zarr.core.common import ZARR_JSON, ZARRAY_JSON, ZGROUP_JSON, ZarrFormat from zarr.errors import ContainsArrayAndGroupError, ContainsArrayError, ContainsGroupError +from zarr.storage._utils import normalize_path from zarr.storage.local import LocalStore from zarr.storage.memory import MemoryStore @@ -41,9 +42,9 @@ class StorePath: store: Store path: str - def __init__(self, store: Store, path: str | None = None) -> None: + def __init__(self, store: Store, path: str = "") -> None: self.store = store - self.path = path or "" + self.path = path async def get( self, @@ -159,6 +160,7 @@ def __eq__(self, other: object) -> bool: async def make_store_path( store_like: StoreLike | None, *, + path: str | None = "", mode: AccessModeLiteral | None = None, storage_options: dict[str, Any] | None = None, ) -> StorePath: @@ -189,6 +191,9 @@ async def make_store_path( ---------- store_like : StoreLike | None The object to convert to a `StorePath` object. + path: str | None, optional + The path to use when creating the `StorePath` object. If None, the + default path is the empty string. mode : AccessModeLiteral | None, optional The mode to use when creating the `StorePath` object. If None, the default mode is 'r'. @@ -209,37 +214,44 @@ async def make_store_path( from zarr.storage.remote import RemoteStore # circular import used_storage_options = False - + path_normalized = normalize_path(path) if isinstance(store_like, StorePath): if mode is not None and mode != store_like.store.mode.str: _store = store_like.store.with_mode(mode) await _store._ensure_open() - store_like = StorePath(_store) - result = store_like + store_like = StorePath(_store, path=store_like.path) / path_normalized + result = store_like / path_normalized elif isinstance(store_like, Store): if mode is not None and mode != store_like.mode.str: store_like = store_like.with_mode(mode) await store_like._ensure_open() - result = StorePath(store_like) + result = StorePath(store_like, path=path_normalized) elif store_like is None: # mode = "w" is an exception to the default mode = 'r' - result = StorePath(await MemoryStore.open(mode=mode or "w")) + result = StorePath(await MemoryStore.open(mode=mode or "w"), path=path_normalized) elif isinstance(store_like, Path): - result = StorePath(await LocalStore.open(root=store_like, mode=mode or "r")) + result = StorePath( + await LocalStore.open(root=store_like, mode=mode or "r"), path=path_normalized + ) elif isinstance(store_like, str): storage_options = storage_options or {} if _is_fsspec_uri(store_like): used_storage_options = True result = StorePath( - RemoteStore.from_url(store_like, storage_options=storage_options, mode=mode or "r") + RemoteStore.from_url(store_like, storage_options=storage_options, mode=mode or "r"), + path=path_normalized, ) else: - result = StorePath(await LocalStore.open(root=Path(store_like), mode=mode or "r")) + result = StorePath( + await LocalStore.open(root=Path(store_like), mode=mode or "r"), path=path_normalized + ) elif isinstance(store_like, dict): # We deliberate only consider dict[str, Buffer] here, and not arbitrary mutable mappings. # By only allowing dictionaries, which are in-memory, we know that MemoryStore appropriate. - result = StorePath(await MemoryStore.open(store_dict=store_like, mode=mode or "r")) + result = StorePath( + await MemoryStore.open(store_dict=store_like, mode=mode or "r"), path=path_normalized + ) else: msg = f"Unsupported type for store_like: '{type(store_like).__name__}'" # type: ignore[unreachable] raise TypeError(msg) diff --git a/tests/v3/test_group.py b/tests/v3/test_group.py index 20960f0346..261a3116f2 100644 --- a/tests/v3/test_group.py +++ b/tests/v3/test_group.py @@ -587,6 +587,7 @@ def test_group_array_creation( assert empty_array.fill_value == 0 assert empty_array.shape == shape assert empty_array.store_path.store == store + assert empty_array.store_path.path == "empty" empty_like_array = group.empty_like(name="empty_like", data=empty_array) assert isinstance(empty_like_array, Array) diff --git a/tests/v3/test_store/test_core.py b/tests/v3/test_store/test_core.py index b2a8292ea9..1fe8fd69f5 100644 --- a/tests/v3/test_store/test_core.py +++ b/tests/v3/test_store/test_core.py @@ -2,7 +2,9 @@ from pathlib import Path import pytest +from upath import UPath +from zarr.storage._utils import normalize_path from zarr.storage.common import StoreLike, StorePath, make_store_path from zarr.storage.local import LocalStore from zarr.storage.memory import MemoryStore @@ -65,3 +67,29 @@ async def test_make_store_path_storage_options_raises(store_like: StoreLike) -> async def test_unsupported() -> None: with pytest.raises(TypeError, match="Unsupported type for store_like: 'int'"): await make_store_path(1) # type: ignore[arg-type] + + +@pytest.mark.parametrize( + "path", + [ + "/foo/bar", + "//foo/bar", + "foo///bar", + "foo/bar///", + Path("foo/bar"), + b"foo/bar", + UPath("foo/bar"), + ], +) +def test_normalize_path_valid(path: str | bytes | Path | UPath) -> None: + assert normalize_path(path) == "foo/bar" + + +def test_normalize_path_none(): + assert normalize_path(None) == "" + + +@pytest.mark.parametrize("path", [".", ".."]) +def test_normalize_path_invalid(path: str): + with pytest.raises(ValueError): + normalize_path(path) From e6ecf99e67ef8e046214eeec297c014e367e59e0 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Wed, 16 Oct 2024 09:36:18 +0200 Subject: [PATCH 4/9] Update tests/v3/test_api.py Co-authored-by: Joe Hamman --- tests/v3/test_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/v3/test_api.py b/tests/v3/test_api.py index 03005cc15d..4952254f65 100644 --- a/tests/v3/test_api.py +++ b/tests/v3/test_api.py @@ -48,7 +48,7 @@ def test_create_array(memory_store: Store) -> None: assert z.chunks == (40,) -@pytest.mark.parametrize("path", ["/", "///foo/bar"]) +@pytest.mark.parametrize("path", ["foo", "/", "/foo", "///foo/bar"]) @pytest.mark.parametrize("node_type", ["array", "group"]) def test_open_normalized_path( memory_store: MemoryStore, path: str, node_type: Literal["array", "group"] From 961bd77f8a0e96d956d1c3e3dac66217285ef494 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Wed, 16 Oct 2024 12:58:35 +0200 Subject: [PATCH 5/9] refactor: remove redundant zarr format fixture --- tests/v3/conftest.py | 2 +- tests/v3/test_group.py | 8 -------- 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/tests/v3/conftest.py b/tests/v3/conftest.py index ad3552ad65..8c66406c9b 100644 --- a/tests/v3/conftest.py +++ b/tests/v3/conftest.py @@ -138,7 +138,7 @@ def array_fixture(request: pytest.FixtureRequest) -> npt.NDArray[Any]: ) -@pytest.fixture(params=(2, 3)) +@pytest.fixture(params=(2, 3), ids=["zarr2", "zarr3"]) def zarr_format(request: pytest.FixtureRequest) -> ZarrFormat: if request.param == 2: return 2 diff --git a/tests/v3/test_group.py b/tests/v3/test_group.py index 261a3116f2..b58efde0e1 100644 --- a/tests/v3/test_group.py +++ b/tests/v3/test_group.py @@ -43,14 +43,6 @@ def exists_ok(request: pytest.FixtureRequest) -> bool: return result -@pytest.fixture(params=[2, 3], ids=["zarr2", "zarr3"]) -def zarr_format(request: pytest.FixtureRequest) -> ZarrFormat: - result = request.param - if result not in (2, 3): - raise ValueError("Wrong value returned from test fixture.") - return cast(ZarrFormat, result) - - def test_group_init(store: Store, zarr_format: ZarrFormat) -> None: """ Test that initializing a group from an asyncgroup works. From b25f7e8d5ce2b9a03c490509f68ee698e1dafb1b Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Wed, 16 Oct 2024 13:00:07 +0200 Subject: [PATCH 6/9] replace assertion with an informative error message --- src/zarr/storage/local.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/zarr/storage/local.py b/src/zarr/storage/local.py index b80b04e1d0..5c03009a97 100644 --- a/src/zarr/storage/local.py +++ b/src/zarr/storage/local.py @@ -97,7 +97,10 @@ def __init__(self, root: Path | str, *, mode: AccessModeLiteral = "r") -> None: super().__init__(mode=mode) if isinstance(root, str): root = Path(root) - assert isinstance(root, Path) + if not isinstance(root, Path): + raise TypeError( + f'"root" must be a string or Path instance. Got an object with type {type(root)} instead.' + ) self.root = root async def _open(self) -> None: From aef42a19a9c3a921342d96fa0064f81d55c2c42e Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Wed, 16 Oct 2024 13:09:59 +0200 Subject: [PATCH 7/9] fix incorrect path concatenation in make_store_path, and refactor store_path tests --- src/zarr/storage/common.py | 2 +- tests/v3/test_group.py | 5 ++- tests/v3/test_store/test_core.py | 64 +++++++++++++++++++++++--------- 3 files changed, 51 insertions(+), 20 deletions(-) diff --git a/src/zarr/storage/common.py b/src/zarr/storage/common.py index e5b820c65b..b640a7729b 100644 --- a/src/zarr/storage/common.py +++ b/src/zarr/storage/common.py @@ -219,7 +219,7 @@ async def make_store_path( if mode is not None and mode != store_like.store.mode.str: _store = store_like.store.with_mode(mode) await _store._ensure_open() - store_like = StorePath(_store, path=store_like.path) / path_normalized + store_like = StorePath(_store, path=store_like.path) result = store_like / path_normalized elif isinstance(store_like, Store): if mode is not None and mode != store_like.mode.str: diff --git a/tests/v3/test_group.py b/tests/v3/test_group.py index b58efde0e1..4f062d5316 100644 --- a/tests/v3/test_group.py +++ b/tests/v3/test_group.py @@ -3,7 +3,7 @@ import contextlib import pickle import warnings -from typing import TYPE_CHECKING, Any, Literal, cast +from typing import TYPE_CHECKING, Any, Literal import numpy as np import pytest @@ -14,7 +14,6 @@ from zarr import Array, AsyncArray, AsyncGroup, Group from zarr.abc.store import Store from zarr.core.buffer import default_buffer_prototype -from zarr.core.common import JSON, ZarrFormat from zarr.core.group import ConsolidatedMetadata, GroupMetadata from zarr.core.sync import sync from zarr.errors import ContainsArrayError, ContainsGroupError @@ -26,6 +25,8 @@ if TYPE_CHECKING: from _pytest.compat import LEGACY_PATH + from zarr.core.common import JSON, ZarrFormat + @pytest.fixture(params=["local", "memory", "zip"]) async def store(request: pytest.FixtureRequest, tmpdir: LEGACY_PATH) -> Store: diff --git a/tests/v3/test_store/test_core.py b/tests/v3/test_store/test_core.py index 1fe8fd69f5..771dc3c43e 100644 --- a/tests/v3/test_store/test_core.py +++ b/tests/v3/test_store/test_core.py @@ -1,7 +1,9 @@ import tempfile from pathlib import Path +from typing import Literal import pytest +from _pytest.compat import LEGACY_PATH from upath import UPath from zarr.storage._utils import normalize_path @@ -11,31 +13,59 @@ from zarr.storage.remote import RemoteStore -async def test_make_store_path(tmpdir: str) -> None: - # None - store_path = await make_store_path(None) +@pytest.mark.parametrize("path", [None, "", "bar"]) +async def test_make_store_path_none(path: str) -> None: + """ + Test that creating a store_path with None creates a memorystore + """ + store_path = await make_store_path(None, path=path) assert isinstance(store_path.store, MemoryStore) - - # str - store_path = await make_store_path(str(tmpdir)) + assert store_path.path == normalize_path(path) + + +@pytest.mark.parametrize("path", [None, "", "bar"]) +@pytest.mark.parametrize("store_type", [str, Path, LocalStore]) +@pytest.mark.parametrize("mode", ["r", "w", "a"]) +async def test_make_store_path_local( + tmpdir: LEGACY_PATH, + store_type: type[str] | type[Path] | type[LocalStore], + path: str, + mode: Literal["r", "w", "a"], +) -> None: + """ + Test the various ways of invoking make_store_path that create a LocalStore + """ + store_like = store_type(str(tmpdir)) + store_path = await make_store_path(store_like, path=path, mode=mode) assert isinstance(store_path.store, LocalStore) assert Path(store_path.store.root) == Path(tmpdir) - - # Path - store_path = await make_store_path(Path(tmpdir)) + assert store_path.path == normalize_path(path) + assert store_path.store.mode.str == mode + + +@pytest.mark.parametrize("path", [None, "", "bar"]) +@pytest.mark.parametrize("mode", ["r", "w", "a"]) +async def test_make_store_path_store_path( + tmpdir: LEGACY_PATH, path: str, mode: Literal["r", "w", "a"] +) -> None: + """ + Test invoking make_store_path when the input is another store_path. In particular we want to ensure + that a new path is handled correctly. + """ + store_like = StorePath(LocalStore(str(tmpdir)), path="root") + store_path = await make_store_path(store_like, path=path, mode=mode) assert isinstance(store_path.store, LocalStore) assert Path(store_path.store.root) == Path(tmpdir) + path_normalized = normalize_path(path) + assert store_path.path == (store_like / path_normalized).path - # Store - store_path = await make_store_path(store_path.store) - assert isinstance(store_path.store, LocalStore) - assert Path(store_path.store.root) == Path(tmpdir) + assert store_path.store.mode.str == mode - # StorePath - store_path = await make_store_path(store_path) - assert isinstance(store_path.store, LocalStore) - assert Path(store_path.store.root) == Path(tmpdir) +async def test_make_store_path_invalid() -> None: + """ + Test that invalid types raise TypeError + """ with pytest.raises(TypeError): await make_store_path(1) # type: ignore[arg-type] From 54642e8c48814237dfec6ddb9481012b31267043 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Wed, 16 Oct 2024 13:24:10 +0200 Subject: [PATCH 8/9] remove upath import because we don't need it --- src/zarr/storage/_utils.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/zarr/storage/_utils.py b/src/zarr/storage/_utils.py index 6f7f958451..2d83f7287b 100644 --- a/src/zarr/storage/_utils.py +++ b/src/zarr/storage/_utils.py @@ -3,20 +3,19 @@ from pathlib import Path from typing import TYPE_CHECKING -from upath import UPath - if TYPE_CHECKING: from zarr.core.buffer import Buffer -def normalize_path(path: str | bytes | Path | UPath | None) -> str: +def normalize_path(path: str | bytes | Path | None) -> str: # handle bytes if path is None: result = "" elif isinstance(path, bytes): result = str(path, "ascii") # ensure str - elif isinstance(path, Path | UPath): + # handle pathlib.Path and upath.Path + elif isinstance(path, Path): result = str(path) elif isinstance(path, str): From f4d2aa1b2d61e6771311955ee11272aea83b53b0 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Wed, 16 Oct 2024 17:15:43 +0200 Subject: [PATCH 9/9] apply suggestions from code review --- src/zarr/storage/_utils.py | 26 +++++++------------------- 1 file changed, 7 insertions(+), 19 deletions(-) diff --git a/src/zarr/storage/_utils.py b/src/zarr/storage/_utils.py index 2d83f7287b..ae39468897 100644 --- a/src/zarr/storage/_utils.py +++ b/src/zarr/storage/_utils.py @@ -1,5 +1,6 @@ from __future__ import annotations +import re from pathlib import Path from typing import TYPE_CHECKING @@ -8,13 +9,12 @@ def normalize_path(path: str | bytes | Path | None) -> str: - # handle bytes if path is None: result = "" elif isinstance(path, bytes): result = str(path, "ascii") - # ensure str - # handle pathlib.Path and upath.Path + + # handle pathlib.Path elif isinstance(path, Path): result = str(path) @@ -27,24 +27,12 @@ def normalize_path(path: str | bytes | Path | None) -> str: # convert backslash to forward slash result = result.replace("\\", "/") - # ensure no leading slash - while len(result) > 0 and result[0] == "/": - result = result[1:] - - # ensure no trailing slash - while len(result) > 0 and result[-1] == "/": - result = result[:-1] + # remove leading and trailing slashes + result = result.strip("/") # collapse any repeated slashes - previous_char = None - collapsed = "" - for char in result: - if char == "/" and previous_char == "/": - pass - else: - collapsed += char - previous_char = char - result = collapsed + pat = re.compile(r"//+") + result = pat.sub("/", result) # disallow path segments with just '.' or '..' segments = result.split("/")