Skip to content

Commit be465da

Browse files
dcherianparaseba
andcommitted
Stateful tests for array/group manipulation
Port over some stateful tests from [Arraylake](https://docs.earthmover.io/). Co-authored-by: Sebastián Galkin <[email protected]>
1 parent a31046c commit be465da

File tree

1 file changed

+220
-0
lines changed

1 file changed

+220
-0
lines changed
+220
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,220 @@
1+
import hypothesis.extra.numpy as npst
2+
import hypothesis.strategies as st
3+
import pytest
4+
from hypothesis import assume, note
5+
from hypothesis.stateful import (
6+
RuleBasedStateMachine,
7+
Settings,
8+
initialize,
9+
invariant,
10+
precondition,
11+
rule,
12+
run_state_machine_as_test,
13+
)
14+
15+
import zarr
16+
from zarr import Array
17+
from zarr.abc.store import Store
18+
from zarr.core.sync import SyncMixin
19+
from zarr.storage import MemoryStore, ZipStore
20+
from zarr.testing.strategies import node_names, np_array_and_chunks, numpy_arrays
21+
22+
23+
def split_prefix_name(path):
24+
split = path.rsplit("/", maxsplit=1)
25+
if len(split) > 1:
26+
prefix, name = split
27+
else:
28+
prefix = ""
29+
(name,) = split
30+
return prefix, name
31+
32+
33+
class ZarrHierarchyStateMachine(SyncMixin, RuleBasedStateMachine):
34+
"""
35+
This state machine models operations that modify a zarr store's
36+
hierarchy. That is, user actions that modify arrays/groups as well
37+
as list operations. It is intended to be used by external stores, and
38+
compares their results to a MemoryStore that is assumed to be perfect.
39+
"""
40+
41+
def __init__(self, store) -> None:
42+
super().__init__()
43+
44+
self.store = store
45+
46+
self.model = MemoryStore(mode="w")
47+
zarr.group(store=self.model)
48+
49+
# Track state of the hierarchy, these should contain fully qualified paths
50+
self.all_groups = set()
51+
self.all_arrays = set()
52+
53+
@initialize()
54+
def init_store(self):
55+
# This lets us reuse the fixture provided store.
56+
self._sync(self.store.clear())
57+
zarr.group(store=self.store)
58+
59+
def can_add(self, path):
60+
return path not in self.all_groups and path not in self.all_arrays
61+
62+
# -------------------- store operations -----------------------
63+
@rule(name=node_names, data=st.data())
64+
def add_group(self, name, data):
65+
if self.all_groups:
66+
parent = data.draw(st.sampled_from(sorted(self.all_groups)), label="Group parent")
67+
else:
68+
parent = ""
69+
path = f"{parent}/{name}".lstrip("/")
70+
assume(self.can_add(path))
71+
note(f"Adding group: path='{path}'")
72+
self.all_groups.add(path)
73+
zarr.group(store=self.store, path=path)
74+
zarr.group(store=self.model, path=path)
75+
76+
@rule(
77+
data=st.data(),
78+
name=node_names,
79+
array_and_chunks=np_array_and_chunks(arrays=numpy_arrays(zarr_formats=st.just(3))),
80+
)
81+
def add_array(self, data, name, array_and_chunks):
82+
array, chunks = array_and_chunks
83+
fill_value = data.draw(npst.from_dtype(array.dtype))
84+
if self.all_groups:
85+
parent = data.draw(st.sampled_from(sorted(self.all_groups)), label="Array parent")
86+
else:
87+
parent = ""
88+
path = f"{parent}/{name}".lstrip("/")
89+
assume(self.can_add(path))
90+
note(f"Adding array: path='{path}' shape={array.shape} chunks={chunks}")
91+
for store in [self.store, self.model]:
92+
zarr.array(array, chunks=chunks, path=path, store=store, fill_value=fill_value)
93+
self.all_arrays.add(path)
94+
95+
# @precondition(lambda self: bool(self.all_groups))
96+
# @precondition(lambda self: bool(self.all_arrays))
97+
# @rule(data=st.data())
98+
# def move_array(self, data):
99+
# array_path = data.draw(st.sampled_from(self.all_arrays), label="Array move source")
100+
# to_group = data.draw(st.sampled_from(self.all_groups), label="Array move destination")
101+
102+
# # fixme renaiming to self?
103+
# array_name = os.path.basename(array_path)
104+
# assume(self.model.can_add(to_group, array_name))
105+
# new_path = f"{to_group}/{array_name}".lstrip("/")
106+
# note(f"moving array '{array_path}' -> '{new_path}'")
107+
# self.model.rename(array_path, new_path)
108+
# self.repo.store.rename(array_path, new_path)
109+
110+
# @precondition(lambda self: len(self.all_groups) >= 2)
111+
# @rule(data=st.data())
112+
# def move_group(self, data):
113+
# from_group = data.draw(st.sampled_from(self.all_groups), label="Group move source")
114+
# to_group = data.draw(st.sampled_from(self.all_groups), label="Group move destination")
115+
# assume(not to_group.startswith(from_group))
116+
117+
# from_group_name = os.path.basename(from_group)
118+
# assume(self.model.can_add(to_group, from_group_name))
119+
# # fixme renaiming to self?
120+
# new_path = f"{to_group}/{from_group_name}".lstrip("/")
121+
# note(f"moving group '{from_group}' -> '{new_path}'")
122+
# self.model.rename(from_group, new_path)
123+
# self.repo.store.rename(from_group, new_path)
124+
125+
@precondition(lambda self: len(self.all_arrays) >= 1)
126+
@rule(data=st.data())
127+
def delete_array_using_del(self, data):
128+
array_path = data.draw(
129+
st.sampled_from(sorted(self.all_arrays)), label="Array deletion target"
130+
)
131+
prefix, array_name = split_prefix_name(array_path)
132+
note(f"Deleting array '{array_path}' ({prefix=!r}, {array_name=!r}) using del")
133+
for store in [self.model, self.store]:
134+
group = zarr.open_group(path=prefix, store=store)
135+
group[array_name] # check that it exists
136+
del group[array_name]
137+
self.all_arrays.remove(array_path)
138+
139+
@precondition(lambda self: len(self.all_groups) >= 2) # fixme don't delete root
140+
@rule(data=st.data())
141+
def delete_group_using_del(self, data):
142+
group_path = data.draw(
143+
st.sampled_from(sorted(self.all_groups)), label="Group deletion target"
144+
)
145+
prefix, group_name = split_prefix_name(group_path)
146+
note(f"Deleting group '{group_path}', {prefix=!r}, {group_name=!r} using delete")
147+
for _, obj in zarr.open_group(store=self.model, path=group_path).members():
148+
if isinstance(obj, Array):
149+
self.all_arrays.remove(obj.path)
150+
else:
151+
self.all_groups.remove(obj.path)
152+
for store in [self.store, self.model]:
153+
group = zarr.open_group(store=store, path=prefix)
154+
group[group_name] # check that it exists
155+
del group[group_name]
156+
if group_path != "/":
157+
# The root group is always present
158+
self.all_groups.remove(group_path)
159+
160+
# # --------------- assertions -----------------
161+
# def check_group_arrays(self, group):
162+
# # note(f"Checking arrays of '{group}'")
163+
# g1 = self.model.get_group(group)
164+
# g2 = zarr.open_group(path=group, mode="r", store=self.repo.store)
165+
# model_arrays = sorted(g1.arrays(), key=itemgetter(0))
166+
# our_arrays = sorted(g2.arrays(), key=itemgetter(0))
167+
# for (n1, a1), (n2, a2) in zip_longest(model_arrays, our_arrays):
168+
# assert n1 == n2
169+
# assert_array_equal(a1, a2)
170+
171+
# def check_subgroups(self, group_path):
172+
# g1 = self.model.get_group(group_path)
173+
# g2 = zarr.open_group(path=group_path, mode="r", store=self.repo.store)
174+
# g1_children = [name for (name, _) in g1.groups()]
175+
# g2_children = [name for (name, _) in g2.groups()]
176+
# # note(f"Checking {len(g1_children)} subgroups of group '{group_path}'")
177+
# assert g1_children == g2_children
178+
179+
# def check_list_prefix_from_group(self, group):
180+
# prefix = f"meta/root/{group}"
181+
# model_list = sorted(self.model.list_prefix(prefix))
182+
# al_list = sorted(self.repo.store.list_prefix(prefix))
183+
# # note(f"Checking {len(model_list)} keys under '{prefix}'")
184+
# assert model_list == al_list
185+
186+
# prefix = f"data/root/{group}"
187+
# model_list = sorted(self.model.list_prefix(prefix))
188+
# al_list = sorted(self.repo.store.list_prefix(prefix))
189+
# # note(f"Checking {len(model_list)} keys under '{prefix}'")
190+
# assert model_list == al_list
191+
192+
# @precondition(lambda self: self.model.is_persistent_session())
193+
# @rule(data=st.data())
194+
# def check_group_path(self, data):
195+
# t0 = time.time()
196+
# group = data.draw(st.sampled_from(self.all_groups))
197+
# self.check_list_prefix_from_group(group)
198+
# self.check_subgroups(group)
199+
# self.check_group_arrays(group)
200+
# t1 = time.time()
201+
# note(f"Checks took {t1 - t0} sec.")
202+
203+
@invariant()
204+
def check_list_prefix_from_root(self):
205+
model_list = self._sync_iter(self.model.list_prefix(""))
206+
store_list = self._sync_iter(self.store.list_prefix(""))
207+
note(f"Checking {len(model_list)} keys")
208+
assert sorted(model_list) == sorted(store_list)
209+
210+
211+
def test_zarr_hierarchy(sync_store: Store):
212+
def mk_test_instance_sync() -> ZarrHierarchyStateMachine:
213+
return ZarrHierarchyStateMachine(sync_store)
214+
215+
if isinstance(sync_store, ZipStore):
216+
pytest.skip(reason="ZipStore does not support delete")
217+
if isinstance(sync_store, MemoryStore):
218+
run_state_machine_as_test(
219+
mk_test_instance_sync, settings=Settings(report_multiple_bugs=False)
220+
)

0 commit comments

Comments
 (0)