diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py index 559049ae4f..e500562c4c 100644 --- a/src/zarr/api/asynchronous.py +++ b/src/zarr/api/asynchronous.py @@ -170,10 +170,7 @@ async def consolidate_metadata( The group, with the ``consolidated_metadata`` field set to include the metadata of each child node. """ - store_path = await make_store_path(store) - - if path is not None: - store_path = store_path / path + store_path = await make_store_path(store, path=path) group = await AsyncGroup.open(store_path, zarr_format=zarr_format, use_consolidated=False) group.store_path.store._check_writable() @@ -291,10 +288,7 @@ async def open( """ zarr_format = _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format) - store_path = await make_store_path(store, mode=mode, storage_options=storage_options) - - if path is not None: - store_path = store_path / path + store_path = await make_store_path(store, mode=mode, path=path, storage_options=storage_options) if "shape" not in kwargs and mode in {"a", "w", "w-"}: try: @@ -401,9 +395,7 @@ async def save_array( ) mode = kwargs.pop("mode", None) - store_path = await make_store_path(store, mode=mode, storage_options=storage_options) - if path is not None: - store_path = store_path / path + store_path = await make_store_path(store, path=path, mode=mode, storage_options=storage_options) new = await AsyncArray.create( store_path, zarr_format=zarr_format, @@ -582,9 +574,7 @@ async def group( mode = None if isinstance(store, Store) else cast(AccessModeLiteral, "a") - store_path = await make_store_path(store, mode=mode, storage_options=storage_options) - if path is not None: - store_path = store_path / path + store_path = await make_store_path(store, path=path, mode=mode, storage_options=storage_options) if chunk_store is not None: warnings.warn("chunk_store is not yet implemented", RuntimeWarning, stacklevel=2) @@ -697,9 +687,7 @@ async def open_group( if chunk_store is not None: warnings.warn("chunk_store is not yet implemented", RuntimeWarning, stacklevel=2) - store_path = await make_store_path(store, mode=mode, storage_options=storage_options) - if path is not None: - store_path = store_path / path + store_path = await make_store_path(store, mode=mode, storage_options=storage_options, path=path) if attributes is None: attributes = {} @@ -883,9 +871,7 @@ async def create( if not isinstance(store, Store | StorePath): mode = "a" - store_path = await make_store_path(store, mode=mode, storage_options=storage_options) - if path is not None: - store_path = store_path / path + store_path = await make_store_path(store, path=path, mode=mode, storage_options=storage_options) return await AsyncArray.create( store_path, @@ -925,6 +911,7 @@ async def empty( retrieve data from an empty Zarr array, any values may be returned, and these are not guaranteed to be stable from one access to the next. """ + return await create(shape=shape, fill_value=None, **kwargs) @@ -1044,7 +1031,7 @@ async def open_array( store: StoreLike | None = None, zarr_version: ZarrFormat | None = None, # deprecated zarr_format: ZarrFormat | None = None, - path: PathLike | None = None, + path: PathLike = "", storage_options: dict[str, Any] | None = None, **kwargs: Any, # TODO: type kwargs as valid args to save ) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]: @@ -1071,9 +1058,7 @@ async def open_array( """ mode = kwargs.pop("mode", None) - store_path = await make_store_path(store, mode=mode) - if path is not None: - store_path = store_path / path + store_path = await make_store_path(store, path=path, mode=mode) zarr_format = _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format) diff --git a/src/zarr/storage/_utils.py b/src/zarr/storage/_utils.py index cbc9c42bbd..ae39468897 100644 --- a/src/zarr/storage/_utils.py +++ b/src/zarr/storage/_utils.py @@ -1,11 +1,49 @@ from __future__ import annotations +import re +from pathlib import Path from typing import TYPE_CHECKING if TYPE_CHECKING: from zarr.core.buffer import Buffer +def normalize_path(path: str | bytes | Path | None) -> str: + if path is None: + result = "" + elif isinstance(path, bytes): + result = str(path, "ascii") + + # handle pathlib.Path + elif isinstance(path, Path): + result = str(path) + + elif isinstance(path, str): + result = path + + else: + raise TypeError(f'Object {path} has an invalid type for "path": {type(path).__name__}') + + # convert backslash to forward slash + result = result.replace("\\", "/") + + # remove leading and trailing slashes + result = result.strip("/") + + # collapse any repeated slashes + pat = re.compile(r"//+") + result = pat.sub("/", result) + + # disallow path segments with just '.' or '..' + segments = result.split("/") + if any(s in {".", ".."} for s in segments): + raise ValueError( + f"The path {path!r} is invalid because its string representation contains '.' or '..' segments." + ) + + return result + + def _normalize_interval_index( data: Buffer, interval: None | tuple[int | None, int | None] ) -> tuple[int, int]: diff --git a/src/zarr/storage/common.py b/src/zarr/storage/common.py index 101e8f38af..b640a7729b 100644 --- a/src/zarr/storage/common.py +++ b/src/zarr/storage/common.py @@ -8,6 +8,7 @@ from zarr.core.buffer import Buffer, default_buffer_prototype from zarr.core.common import ZARR_JSON, ZARRAY_JSON, ZGROUP_JSON, ZarrFormat from zarr.errors import ContainsArrayAndGroupError, ContainsArrayError, ContainsGroupError +from zarr.storage._utils import normalize_path from zarr.storage.local import LocalStore from zarr.storage.memory import MemoryStore @@ -41,9 +42,9 @@ class StorePath: store: Store path: str - def __init__(self, store: Store, path: str | None = None) -> None: + def __init__(self, store: Store, path: str = "") -> None: self.store = store - self.path = path or "" + self.path = path async def get( self, @@ -159,6 +160,7 @@ def __eq__(self, other: object) -> bool: async def make_store_path( store_like: StoreLike | None, *, + path: str | None = "", mode: AccessModeLiteral | None = None, storage_options: dict[str, Any] | None = None, ) -> StorePath: @@ -189,6 +191,9 @@ async def make_store_path( ---------- store_like : StoreLike | None The object to convert to a `StorePath` object. + path: str | None, optional + The path to use when creating the `StorePath` object. If None, the + default path is the empty string. mode : AccessModeLiteral | None, optional The mode to use when creating the `StorePath` object. If None, the default mode is 'r'. @@ -209,37 +214,44 @@ async def make_store_path( from zarr.storage.remote import RemoteStore # circular import used_storage_options = False - + path_normalized = normalize_path(path) if isinstance(store_like, StorePath): if mode is not None and mode != store_like.store.mode.str: _store = store_like.store.with_mode(mode) await _store._ensure_open() - store_like = StorePath(_store) - result = store_like + store_like = StorePath(_store, path=store_like.path) + result = store_like / path_normalized elif isinstance(store_like, Store): if mode is not None and mode != store_like.mode.str: store_like = store_like.with_mode(mode) await store_like._ensure_open() - result = StorePath(store_like) + result = StorePath(store_like, path=path_normalized) elif store_like is None: # mode = "w" is an exception to the default mode = 'r' - result = StorePath(await MemoryStore.open(mode=mode or "w")) + result = StorePath(await MemoryStore.open(mode=mode or "w"), path=path_normalized) elif isinstance(store_like, Path): - result = StorePath(await LocalStore.open(root=store_like, mode=mode or "r")) + result = StorePath( + await LocalStore.open(root=store_like, mode=mode or "r"), path=path_normalized + ) elif isinstance(store_like, str): storage_options = storage_options or {} if _is_fsspec_uri(store_like): used_storage_options = True result = StorePath( - RemoteStore.from_url(store_like, storage_options=storage_options, mode=mode or "r") + RemoteStore.from_url(store_like, storage_options=storage_options, mode=mode or "r"), + path=path_normalized, ) else: - result = StorePath(await LocalStore.open(root=Path(store_like), mode=mode or "r")) + result = StorePath( + await LocalStore.open(root=Path(store_like), mode=mode or "r"), path=path_normalized + ) elif isinstance(store_like, dict): # We deliberate only consider dict[str, Buffer] here, and not arbitrary mutable mappings. # By only allowing dictionaries, which are in-memory, we know that MemoryStore appropriate. - result = StorePath(await MemoryStore.open(store_dict=store_like, mode=mode or "r")) + result = StorePath( + await MemoryStore.open(store_dict=store_like, mode=mode or "r"), path=path_normalized + ) else: msg = f"Unsupported type for store_like: '{type(store_like).__name__}'" # type: ignore[unreachable] raise TypeError(msg) diff --git a/src/zarr/storage/local.py b/src/zarr/storage/local.py index b80b04e1d0..5c03009a97 100644 --- a/src/zarr/storage/local.py +++ b/src/zarr/storage/local.py @@ -97,7 +97,10 @@ def __init__(self, root: Path | str, *, mode: AccessModeLiteral = "r") -> None: super().__init__(mode=mode) if isinstance(root, str): root = Path(root) - assert isinstance(root, Path) + if not isinstance(root, Path): + raise TypeError( + f'"root" must be a string or Path instance. Got an object with type {type(root)} instead.' + ) self.root = root async def _open(self) -> None: diff --git a/tests/v3/conftest.py b/tests/v3/conftest.py index ad3552ad65..8c66406c9b 100644 --- a/tests/v3/conftest.py +++ b/tests/v3/conftest.py @@ -138,7 +138,7 @@ def array_fixture(request: pytest.FixtureRequest) -> npt.NDArray[Any]: ) -@pytest.fixture(params=(2, 3)) +@pytest.fixture(params=(2, 3), ids=["zarr2", "zarr3"]) def zarr_format(request: pytest.FixtureRequest) -> ZarrFormat: if request.param == 2: return 2 diff --git a/tests/v3/test_api.py b/tests/v3/test_api.py index 0614185f68..4952254f65 100644 --- a/tests/v3/test_api.py +++ b/tests/v3/test_api.py @@ -1,5 +1,6 @@ import pathlib import warnings +from typing import Literal import numpy as np import pytest @@ -10,9 +11,19 @@ import zarr.core.group from zarr import Array, Group from zarr.abc.store import Store -from zarr.api.synchronous import create, group, load, open, open_group, save, save_array, save_group +from zarr.api.synchronous import ( + create, + group, + load, + open, + open_group, + save, + save_array, + save_group, +) from zarr.core.common import ZarrFormat from zarr.errors import MetadataValidationError +from zarr.storage._utils import normalize_path from zarr.storage.memory import MemoryStore @@ -37,6 +48,20 @@ def test_create_array(memory_store: Store) -> None: assert z.chunks == (40,) +@pytest.mark.parametrize("path", ["foo", "/", "/foo", "///foo/bar"]) +@pytest.mark.parametrize("node_type", ["array", "group"]) +def test_open_normalized_path( + memory_store: MemoryStore, path: str, node_type: Literal["array", "group"] +) -> None: + node: Group | Array + if node_type == "group": + node = group(store=memory_store, path=path) + elif node_type == "array": + node = create(store=memory_store, path=path, shape=(2,)) + + assert node.path == normalize_path(path) + + async def test_open_array(memory_store: MemoryStore) -> None: store = memory_store diff --git a/tests/v3/test_group.py b/tests/v3/test_group.py index 20960f0346..4f062d5316 100644 --- a/tests/v3/test_group.py +++ b/tests/v3/test_group.py @@ -3,7 +3,7 @@ import contextlib import pickle import warnings -from typing import TYPE_CHECKING, Any, Literal, cast +from typing import TYPE_CHECKING, Any, Literal import numpy as np import pytest @@ -14,7 +14,6 @@ from zarr import Array, AsyncArray, AsyncGroup, Group from zarr.abc.store import Store from zarr.core.buffer import default_buffer_prototype -from zarr.core.common import JSON, ZarrFormat from zarr.core.group import ConsolidatedMetadata, GroupMetadata from zarr.core.sync import sync from zarr.errors import ContainsArrayError, ContainsGroupError @@ -26,6 +25,8 @@ if TYPE_CHECKING: from _pytest.compat import LEGACY_PATH + from zarr.core.common import JSON, ZarrFormat + @pytest.fixture(params=["local", "memory", "zip"]) async def store(request: pytest.FixtureRequest, tmpdir: LEGACY_PATH) -> Store: @@ -43,14 +44,6 @@ def exists_ok(request: pytest.FixtureRequest) -> bool: return result -@pytest.fixture(params=[2, 3], ids=["zarr2", "zarr3"]) -def zarr_format(request: pytest.FixtureRequest) -> ZarrFormat: - result = request.param - if result not in (2, 3): - raise ValueError("Wrong value returned from test fixture.") - return cast(ZarrFormat, result) - - def test_group_init(store: Store, zarr_format: ZarrFormat) -> None: """ Test that initializing a group from an asyncgroup works. @@ -587,6 +580,7 @@ def test_group_array_creation( assert empty_array.fill_value == 0 assert empty_array.shape == shape assert empty_array.store_path.store == store + assert empty_array.store_path.path == "empty" empty_like_array = group.empty_like(name="empty_like", data=empty_array) assert isinstance(empty_like_array, Array) diff --git a/tests/v3/test_store/test_core.py b/tests/v3/test_store/test_core.py index b2a8292ea9..771dc3c43e 100644 --- a/tests/v3/test_store/test_core.py +++ b/tests/v3/test_store/test_core.py @@ -1,39 +1,71 @@ import tempfile from pathlib import Path +from typing import Literal import pytest +from _pytest.compat import LEGACY_PATH +from upath import UPath +from zarr.storage._utils import normalize_path from zarr.storage.common import StoreLike, StorePath, make_store_path from zarr.storage.local import LocalStore from zarr.storage.memory import MemoryStore from zarr.storage.remote import RemoteStore -async def test_make_store_path(tmpdir: str) -> None: - # None - store_path = await make_store_path(None) +@pytest.mark.parametrize("path", [None, "", "bar"]) +async def test_make_store_path_none(path: str) -> None: + """ + Test that creating a store_path with None creates a memorystore + """ + store_path = await make_store_path(None, path=path) assert isinstance(store_path.store, MemoryStore) - - # str - store_path = await make_store_path(str(tmpdir)) + assert store_path.path == normalize_path(path) + + +@pytest.mark.parametrize("path", [None, "", "bar"]) +@pytest.mark.parametrize("store_type", [str, Path, LocalStore]) +@pytest.mark.parametrize("mode", ["r", "w", "a"]) +async def test_make_store_path_local( + tmpdir: LEGACY_PATH, + store_type: type[str] | type[Path] | type[LocalStore], + path: str, + mode: Literal["r", "w", "a"], +) -> None: + """ + Test the various ways of invoking make_store_path that create a LocalStore + """ + store_like = store_type(str(tmpdir)) + store_path = await make_store_path(store_like, path=path, mode=mode) assert isinstance(store_path.store, LocalStore) assert Path(store_path.store.root) == Path(tmpdir) - - # Path - store_path = await make_store_path(Path(tmpdir)) + assert store_path.path == normalize_path(path) + assert store_path.store.mode.str == mode + + +@pytest.mark.parametrize("path", [None, "", "bar"]) +@pytest.mark.parametrize("mode", ["r", "w", "a"]) +async def test_make_store_path_store_path( + tmpdir: LEGACY_PATH, path: str, mode: Literal["r", "w", "a"] +) -> None: + """ + Test invoking make_store_path when the input is another store_path. In particular we want to ensure + that a new path is handled correctly. + """ + store_like = StorePath(LocalStore(str(tmpdir)), path="root") + store_path = await make_store_path(store_like, path=path, mode=mode) assert isinstance(store_path.store, LocalStore) assert Path(store_path.store.root) == Path(tmpdir) + path_normalized = normalize_path(path) + assert store_path.path == (store_like / path_normalized).path - # Store - store_path = await make_store_path(store_path.store) - assert isinstance(store_path.store, LocalStore) - assert Path(store_path.store.root) == Path(tmpdir) + assert store_path.store.mode.str == mode - # StorePath - store_path = await make_store_path(store_path) - assert isinstance(store_path.store, LocalStore) - assert Path(store_path.store.root) == Path(tmpdir) +async def test_make_store_path_invalid() -> None: + """ + Test that invalid types raise TypeError + """ with pytest.raises(TypeError): await make_store_path(1) # type: ignore[arg-type] @@ -65,3 +97,29 @@ async def test_make_store_path_storage_options_raises(store_like: StoreLike) -> async def test_unsupported() -> None: with pytest.raises(TypeError, match="Unsupported type for store_like: 'int'"): await make_store_path(1) # type: ignore[arg-type] + + +@pytest.mark.parametrize( + "path", + [ + "/foo/bar", + "//foo/bar", + "foo///bar", + "foo/bar///", + Path("foo/bar"), + b"foo/bar", + UPath("foo/bar"), + ], +) +def test_normalize_path_valid(path: str | bytes | Path | UPath) -> None: + assert normalize_path(path) == "foo/bar" + + +def test_normalize_path_none(): + assert normalize_path(None) == "" + + +@pytest.mark.parametrize("path", [".", ".."]) +def test_normalize_path_invalid(path: str): + with pytest.raises(ValueError): + normalize_path(path)