Skip to content

Normalize paths when creating StorePath #2850

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Mar 4, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions changes/2850.fix.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Fixed a bug where ``StorePath`` creation would not apply standard path normalization to the ``path`` parameter,
which led to the creation of arrays and groups with invalid keys.
2 changes: 1 addition & 1 deletion src/zarr/storage/_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ class StorePath:

def __init__(self, store: Store, path: str = "") -> None:
self.store = store
self.path = path
self.path = normalize_path(path)

@property
def read_only(self) -> bool:
Expand Down
4 changes: 3 additions & 1 deletion src/zarr/testing/strategies.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from zarr.core.sync import sync
from zarr.storage import MemoryStore, StoreLike
from zarr.storage._common import _dereference_path
from zarr.storage._utils import normalize_path

# Copied from Xarray
_attr_keys = st.text(st.characters(), min_size=1)
Expand Down Expand Up @@ -277,11 +278,12 @@ def arrays(
if a.metadata.zarr_format == 3:
assert a.fill_value is not None
assert a.name is not None
assert a.path == normalize_path(array_path)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

shall we delete line 287 then.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

assert a.name == "/" + a.path
assert isinstance(root[array_path], Array)
assert nparray.shape == a.shape
assert chunk_shape == a.chunks
assert shard_shape == a.shards
assert array_path == a.path, (path, name, array_path, a.name, a.path)
assert a.basename == name, (a.basename, name)
assert dict(a.attrs) == expected_attrs

Expand Down
65 changes: 38 additions & 27 deletions tests/test_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,24 +130,27 @@ async def test_create_creates_parents(store: Store, zarr_format: ZarrFormat) ->
assert g.attrs == {}


def test_group_name_properties(store: Store, zarr_format: ZarrFormat) -> None:
@pytest.mark.parametrize("store", ["memory"], indirect=True)
@pytest.mark.parametrize("root_name", ["", "/", "a", "/a"])
@pytest.mark.parametrize("branch_name", ["foo", "/foo", "foo/bar", "/foo/bar"])
def test_group_name_properties(
store: Store, zarr_format: ZarrFormat, root_name: str, branch_name: str
) -> None:
"""
Test basic properties of groups
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do think the old test here is useful (in addition to the updated test), because it shows explicitly what the values of all the properties are, making it much easier to see what's changed if they change (which they might have done with this PR). So I'd be pro just copy/pasting the old test back here along with the new test?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the new test is the same as the old test, but parametrized instead of 2 cases copy + pasted

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

as a rule we should avoid tests that follow the "copy + pasted code blocks with minor modifications" pattern. these are huge source of friction when fixing bugs. that friction should be reduced by using parametrization where we can, as I did here.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agree that we shouldn't copy/paste with minor modifications, but I think one test that hard codes the results instead of using other properties is worthwhile to have.

Test that the path, name, and basename attributes of a group and its subgroups are consistent
"""
root = Group.from_store(store=store, zarr_format=zarr_format)
assert root.path == ""
assert root.name == "/"
assert root.basename == ""
root = Group.from_store(store=StorePath(store=store, path=root_name), zarr_format=zarr_format)
assert root.path == normalize_path(root_name)
assert root.name == "/" + root.path
assert root.basename == root.path

foo = root.create_group("foo")
assert foo.path == "foo"
assert foo.name == "/foo"
assert foo.basename == "foo"

bar = root.create_group("foo/bar")
assert bar.path == "foo/bar"
assert bar.name == "/foo/bar"
assert bar.basename == "bar"
branch = root.create_group(branch_name)
if root.path == "":
assert branch.path == normalize_path(branch_name)
else:
assert branch.path == "/".join([root.path, normalize_path(branch_name)])
assert branch.name == "/" + branch.path
assert branch.basename == branch_name.split("/")[-1]


@pytest.mark.parametrize("consolidated_metadata", [True, False])
Expand Down Expand Up @@ -623,11 +626,13 @@ async def test_group_update_attributes_async(store: Store, zarr_format: ZarrForm


@pytest.mark.parametrize("method", ["create_array", "array"])
@pytest.mark.parametrize("name", ["a", "/a"])
def test_group_create_array(
store: Store,
zarr_format: ZarrFormat,
overwrite: bool,
method: Literal["create_array", "array"],
name: str,
) -> None:
"""
Test `Group.from_store`
Expand All @@ -638,23 +643,26 @@ def test_group_create_array(
data = np.arange(np.prod(shape)).reshape(shape).astype(dtype)

if method == "create_array":
array = group.create_array(name="array", shape=shape, dtype=dtype)
array = group.create_array(name=name, shape=shape, dtype=dtype)
array[:] = data
elif method == "array":
with pytest.warns(DeprecationWarning):
array = group.array(name="array", data=data, shape=shape, dtype=dtype)
array = group.array(name=name, data=data, shape=shape, dtype=dtype)
else:
raise AssertionError

if not overwrite:
if method == "create_array":
with pytest.raises(ContainsArrayError):
a = group.create_array(name="array", shape=shape, dtype=dtype)
a = group.create_array(name=name, shape=shape, dtype=dtype)
a[:] = data
elif method == "array":
with pytest.raises(ContainsArrayError), pytest.warns(DeprecationWarning):
a = group.array(name="array", shape=shape, dtype=dtype)
a = group.array(name=name, shape=shape, dtype=dtype)
a[:] = data

assert array.path == normalize_path(name)
assert array.name == "/" + array.path
assert array.shape == shape
assert array.dtype == np.dtype(dtype)
assert np.array_equal(array[:], data)
Expand Down Expand Up @@ -945,20 +953,23 @@ async def test_asyncgroup_delitem(store: Store, zarr_format: ZarrFormat) -> None
raise AssertionError


@pytest.mark.parametrize("name", ["a", "/a"])
async def test_asyncgroup_create_group(
store: Store,
name: str,
zarr_format: ZarrFormat,
) -> None:
agroup = await AsyncGroup.from_store(store=store, zarr_format=zarr_format)
sub_node_path = "sub_group"
attributes = {"foo": 999}
subnode = await agroup.create_group(name=sub_node_path, attributes=attributes)

assert isinstance(subnode, AsyncGroup)
assert subnode.attrs == attributes
assert subnode.store_path.path == sub_node_path
assert subnode.store_path.store == store
assert subnode.metadata.zarr_format == zarr_format
subgroup = await agroup.create_group(name=name, attributes=attributes)

assert isinstance(subgroup, AsyncGroup)
assert subgroup.path == normalize_path(name)
assert subgroup.name == "/" + subgroup.path
assert subgroup.attrs == attributes
assert subgroup.store_path.path == subgroup.path
assert subgroup.store_path.store == store
assert subgroup.metadata.zarr_format == zarr_format


async def test_asyncgroup_create_array(
Expand Down