Skip to content

Commit c42433d

Browse files
committed
update Attributes, adding StoreV3 support
1 parent 4057088 commit c42433d

File tree

2 files changed

+139
-54
lines changed

2 files changed

+139
-54
lines changed

zarr/attrs.py

Lines changed: 58 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from collections.abc import MutableMapping
22

3-
from zarr._storage.store import Store
3+
from zarr._storage.store import Store, StoreV3
44
from zarr.util import json_dumps
55

66

@@ -26,7 +26,15 @@ class Attributes(MutableMapping):
2626

2727
def __init__(self, store, key='.zattrs', read_only=False, cache=True,
2828
synchronizer=None):
29-
self.store = Store._ensure_store(store)
29+
30+
self._version = getattr(store, '_store_version', 2)
31+
assert key
32+
33+
if self._version == 3 and '.z' in key:
34+
raise ValueError('invalid v3 key')
35+
36+
_Store = Store if self._version == 2 else StoreV3
37+
self.store = _Store._ensure_store(store)
3038
self.key = key
3139
self.read_only = read_only
3240
self.cache = cache
@@ -38,6 +46,8 @@ def _get_nosync(self):
3846
data = self.store[self.key]
3947
except KeyError:
4048
d = dict()
49+
if self._version > 2:
50+
d['attributes'] = {}
4151
else:
4252
d = self.store._metadata_class.parse_metadata(data)
4353
return d
@@ -47,14 +57,19 @@ def asdict(self):
4757
if self.cache and self._cached_asdict is not None:
4858
return self._cached_asdict
4959
d = self._get_nosync()
60+
if self._version == 3:
61+
d = d['attributes']
5062
if self.cache:
5163
self._cached_asdict = d
5264
return d
5365

5466
def refresh(self):
5567
"""Refresh cached attributes from the store."""
5668
if self.cache:
57-
self._cached_asdict = self._get_nosync()
69+
if self._version == 3:
70+
self._cached_asdict = self._get_nosync()['attributes']
71+
else:
72+
self._cached_asdict = self._get_nosync()
5873

5974
def __contains__(self, x):
6075
return x in self.asdict()
@@ -84,7 +99,10 @@ def _setitem_nosync(self, item, value):
8499
d = self._get_nosync()
85100

86101
# set key value
87-
d[item] = value
102+
if self._version == 2:
103+
d[item] = value
104+
else:
105+
d['attributes'][item] = value
88106

89107
# _put modified data
90108
self._put_nosync(d)
@@ -98,20 +116,45 @@ def _delitem_nosync(self, key):
98116
d = self._get_nosync()
99117

100118
# delete key value
101-
del d[key]
119+
if self._version == 2:
120+
del d[key]
121+
else:
122+
del d['attributes'][key]
102123

103124
# _put modified data
104125
self._put_nosync(d)
105126

106127
def put(self, d):
107128
"""Overwrite all attributes with the key/value pairs in the provided dictionary
108129
`d` in a single operation."""
109-
self._write_op(self._put_nosync, d)
130+
if self._version == 2:
131+
self._write_op(self._put_nosync, d)
132+
else:
133+
self._write_op(self._put_nosync, dict(attributes=d))
110134

111135
def _put_nosync(self, d):
112-
self.store[self.key] = json_dumps(d)
113-
if self.cache:
114-
self._cached_asdict = d
136+
if self._version == 2:
137+
self.store[self.key] = json_dumps(d)
138+
if self.cache:
139+
self._cached_asdict = d
140+
else:
141+
if self.key in self.store:
142+
# Cannot write the attributes directly to JSON, but have to
143+
# store it within the pre-existing attributes key of the v3
144+
# metadata.
145+
146+
# Note: this changes the store.counter result in test_caching_on!
147+
148+
meta = self.store._metadata_class.parse_metadata(self.store[self.key])
149+
if 'attributes' in meta and 'filters' in meta['attributes']:
150+
# need to preserve any existing "filters" attribute
151+
d['attributes']['filters'] = meta['attributes']['filters']
152+
meta['attributes'] = d['attributes']
153+
else:
154+
meta = d
155+
self.store[self.key] = json_dumps(meta)
156+
if self.cache:
157+
self._cached_asdict = d['attributes']
115158

116159
# noinspection PyMethodOverriding
117160
def update(self, *args, **kwargs):
@@ -124,7 +167,12 @@ def _update_nosync(self, *args, **kwargs):
124167
d = self._get_nosync()
125168

126169
# update
127-
d.update(*args, **kwargs)
170+
if self._version == 2:
171+
d.update(*args, **kwargs)
172+
else:
173+
if 'attributes' not in d:
174+
d['attributes'] = {}
175+
d['attributes'].update(*args, **kwargs)
128176

129177
# _put modified data
130178
self._put_nosync(d)

zarr/tests/test_attrs.py

Lines changed: 81 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -3,22 +3,30 @@
33
import pytest
44

55
from zarr.attrs import Attributes
6-
from zarr.tests.util import CountingDict
7-
from zarr.storage import KVStore
6+
from zarr.storage import KVStore, KVStoreV3
7+
from zarr.tests.util import CountingDict, CountingDictV3
8+
9+
10+
@pytest.fixture(params=[2, 3])
11+
def zarr_version(request):
12+
return request.param
13+
14+
15+
def _init_store(version):
16+
"""Use a plain dict() for v2, but KVStoreV3 otherwise."""
17+
if version == 2:
18+
return dict()
19+
return KVStoreV3(dict())
820

921

1022
class TestAttributes():
1123

1224
def init_attributes(self, store, read_only=False, cache=True):
1325
return Attributes(store, key='attrs', read_only=read_only, cache=cache)
1426

15-
@pytest.mark.parametrize('store_from_dict', [False, True])
16-
def test_storage(self, store_from_dict):
27+
def test_storage(self, zarr_version):
1728

18-
if store_from_dict:
19-
store = dict()
20-
else:
21-
store = KVStore(dict())
29+
store = _init_store(zarr_version)
2230
a = Attributes(store=store, key='attrs')
2331
assert isinstance(a.store, KVStore)
2432
assert 'foo' not in a
@@ -30,11 +38,14 @@ def test_storage(self, store_from_dict):
3038
assert 'attrs' in store
3139
assert isinstance(store['attrs'], bytes)
3240
d = json.loads(str(store['attrs'], 'ascii'))
41+
if zarr_version == 3:
42+
d = d['attributes']
3343
assert dict(foo='bar', baz=42) == d
3444

35-
def test_get_set_del_contains(self):
45+
def test_get_set_del_contains(self, zarr_version):
3646

37-
a = self.init_attributes(dict())
47+
store = _init_store(zarr_version)
48+
a = self.init_attributes(store)
3849
assert 'foo' not in a
3950
a['foo'] = 'bar'
4051
a['baz'] = 42
@@ -48,9 +59,10 @@ def test_get_set_del_contains(self):
4859
# noinspection PyStatementEffect
4960
a['foo']
5061

51-
def test_update_put(self):
62+
def test_update_put(self, zarr_version):
5263

53-
a = self.init_attributes(dict())
64+
store = _init_store(zarr_version)
65+
a = self.init_attributes(store)
5466
assert 'foo' not in a
5567
assert 'bar' not in a
5668
assert 'baz' not in a
@@ -65,9 +77,10 @@ def test_update_put(self):
6577
assert a['bar'] == 84
6678
assert 'baz' not in a
6779

68-
def test_iterators(self):
80+
def test_iterators(self, zarr_version):
6981

70-
a = self.init_attributes(dict())
82+
store = _init_store(zarr_version)
83+
a = self.init_attributes(store)
7184
assert 0 == len(a)
7285
assert set() == set(a)
7386
assert set() == set(a.keys())
@@ -83,10 +96,13 @@ def test_iterators(self):
8396
assert {'bar', 42} == set(a.values())
8497
assert {('foo', 'bar'), ('baz', 42)} == set(a.items())
8598

86-
def test_read_only(self):
87-
store = dict()
99+
def test_read_only(self, zarr_version):
100+
store = _init_store(zarr_version)
88101
a = self.init_attributes(store, read_only=True)
89-
store['attrs'] = json.dumps(dict(foo='bar', baz=42)).encode('ascii')
102+
if zarr_version == 2:
103+
store['attrs'] = json.dumps(dict(foo='bar', baz=42)).encode('ascii')
104+
else:
105+
store['attrs'] = json.dumps(dict(attributes=dict(foo='bar', baz=42))).encode('ascii')
90106
assert a['foo'] == 'bar'
91107
assert a['baz'] == 42
92108
with pytest.raises(PermissionError):
@@ -96,8 +112,9 @@ def test_read_only(self):
96112
with pytest.raises(PermissionError):
97113
a.update(foo='quux')
98114

99-
def test_key_completions(self):
100-
a = self.init_attributes(dict())
115+
def test_key_completions(self, zarr_version):
116+
store = _init_store(zarr_version)
117+
a = self.init_attributes(store)
101118
d = a._ipython_key_completions_()
102119
assert 'foo' not in d
103120
assert '123' not in d
@@ -112,14 +129,17 @@ def test_key_completions(self):
112129
assert 'asdf;' in d
113130
assert 'baz' not in d
114131

115-
def test_caching_on(self):
132+
def test_caching_on(self, zarr_version):
116133
# caching is turned on by default
117134

118135
# setup store
119-
store = CountingDict()
136+
store = CountingDict() if zarr_version == 2 else CountingDictV3()
120137
assert 0 == store.counter['__getitem__', 'attrs']
121138
assert 0 == store.counter['__setitem__', 'attrs']
122-
store['attrs'] = json.dumps(dict(foo='xxx', bar=42)).encode('ascii')
139+
if zarr_version == 2:
140+
store['attrs'] = json.dumps(dict(foo='xxx', bar=42)).encode('ascii')
141+
else:
142+
store['attrs'] = json.dumps(dict(attributes=dict(foo='xxx', bar=42))).encode('ascii')
123143
assert 0 == store.counter['__getitem__', 'attrs']
124144
assert 1 == store.counter['__setitem__', 'attrs']
125145

@@ -136,54 +156,65 @@ def test_caching_on(self):
136156

137157
# test __setitem__ updates the cache
138158
a['foo'] = 'yyy'
139-
assert 2 == store.counter['__getitem__', 'attrs']
159+
get_cnt = 2 if zarr_version == 2 else 3
160+
assert get_cnt == store.counter['__getitem__', 'attrs']
140161
assert 2 == store.counter['__setitem__', 'attrs']
141162
assert a['foo'] == 'yyy'
142-
assert 2 == store.counter['__getitem__', 'attrs']
163+
assert get_cnt == store.counter['__getitem__', 'attrs']
143164
assert 2 == store.counter['__setitem__', 'attrs']
144165

145166
# test update() updates the cache
146167
a.update(foo='zzz', bar=84)
147-
assert 3 == store.counter['__getitem__', 'attrs']
168+
get_cnt = 3 if zarr_version == 2 else 5
169+
assert get_cnt == store.counter['__getitem__', 'attrs']
148170
assert 3 == store.counter['__setitem__', 'attrs']
149171
assert a['foo'] == 'zzz'
150172
assert a['bar'] == 84
151-
assert 3 == store.counter['__getitem__', 'attrs']
173+
assert get_cnt == store.counter['__getitem__', 'attrs']
152174
assert 3 == store.counter['__setitem__', 'attrs']
153175

154176
# test __contains__ uses the cache
155177
assert 'foo' in a
156-
assert 3 == store.counter['__getitem__', 'attrs']
178+
assert get_cnt == store.counter['__getitem__', 'attrs']
157179
assert 3 == store.counter['__setitem__', 'attrs']
158180
assert 'spam' not in a
159-
assert 3 == store.counter['__getitem__', 'attrs']
181+
assert get_cnt == store.counter['__getitem__', 'attrs']
160182
assert 3 == store.counter['__setitem__', 'attrs']
161183

162184
# test __delitem__ updates the cache
163185
del a['bar']
164-
assert 4 == store.counter['__getitem__', 'attrs']
186+
get_cnt = 4 if zarr_version == 2 else 7
187+
assert get_cnt == store.counter['__getitem__', 'attrs']
165188
assert 4 == store.counter['__setitem__', 'attrs']
166189
assert 'bar' not in a
167-
assert 4 == store.counter['__getitem__', 'attrs']
190+
assert get_cnt == store.counter['__getitem__', 'attrs']
168191
assert 4 == store.counter['__setitem__', 'attrs']
169192

170193
# test refresh()
171-
store['attrs'] = json.dumps(dict(foo='xxx', bar=42)).encode('ascii')
172-
assert 4 == store.counter['__getitem__', 'attrs']
194+
if zarr_version == 2:
195+
store['attrs'] = json.dumps(dict(foo='xxx', bar=42)).encode('ascii')
196+
else:
197+
store['attrs'] = json.dumps(dict(attributes=dict(foo='xxx', bar=42))).encode('ascii')
198+
assert get_cnt == store.counter['__getitem__', 'attrs']
173199
a.refresh()
174-
assert 5 == store.counter['__getitem__', 'attrs']
200+
get_cnt = 5 if zarr_version == 2 else 8
201+
assert get_cnt == store.counter['__getitem__', 'attrs']
175202
assert a['foo'] == 'xxx'
176-
assert 5 == store.counter['__getitem__', 'attrs']
203+
assert get_cnt == store.counter['__getitem__', 'attrs']
177204
assert a['bar'] == 42
178-
assert 5 == store.counter['__getitem__', 'attrs']
205+
assert get_cnt == store.counter['__getitem__', 'attrs']
179206

180-
def test_caching_off(self):
207+
def test_caching_off(self, zarr_version):
181208

182209
# setup store
183-
store = CountingDict()
210+
store = CountingDict() if zarr_version == 2 else CountingDictV3()
184211
assert 0 == store.counter['__getitem__', 'attrs']
185212
assert 0 == store.counter['__setitem__', 'attrs']
186-
store['attrs'] = json.dumps(dict(foo='xxx', bar=42)).encode('ascii')
213+
214+
if zarr_version == 2:
215+
store['attrs'] = json.dumps(dict(foo='xxx', bar=42)).encode('ascii')
216+
else:
217+
store['attrs'] = json.dumps(dict(attributes=dict(foo='xxx', bar=42))).encode('ascii')
187218
assert 0 == store.counter['__getitem__', 'attrs']
188219
assert 1 == store.counter['__setitem__', 'attrs']
189220

@@ -200,25 +231,31 @@ def test_caching_off(self):
200231

201232
# test __setitem__
202233
a['foo'] = 'yyy'
203-
assert 4 == store.counter['__getitem__', 'attrs']
234+
get_cnt = 4 if zarr_version == 2 else 5
235+
assert get_cnt == store.counter['__getitem__', 'attrs']
204236
assert 2 == store.counter['__setitem__', 'attrs']
205237
assert a['foo'] == 'yyy'
206-
assert 5 == store.counter['__getitem__', 'attrs']
238+
get_cnt = 5 if zarr_version == 2 else 6
239+
assert get_cnt == store.counter['__getitem__', 'attrs']
207240
assert 2 == store.counter['__setitem__', 'attrs']
208241

209242
# test update()
210243
a.update(foo='zzz', bar=84)
211-
assert 6 == store.counter['__getitem__', 'attrs']
244+
get_cnt = 6 if zarr_version == 2 else 8
245+
assert get_cnt == store.counter['__getitem__', 'attrs']
212246
assert 3 == store.counter['__setitem__', 'attrs']
213247
assert a['foo'] == 'zzz'
214248
assert a['bar'] == 84
215-
assert 8 == store.counter['__getitem__', 'attrs']
249+
get_cnt = 8 if zarr_version == 2 else 10
250+
assert get_cnt == store.counter['__getitem__', 'attrs']
216251
assert 3 == store.counter['__setitem__', 'attrs']
217252

218253
# test __contains__
219254
assert 'foo' in a
220-
assert 9 == store.counter['__getitem__', 'attrs']
255+
get_cnt = 9 if zarr_version == 2 else 11
256+
assert get_cnt == store.counter['__getitem__', 'attrs']
221257
assert 3 == store.counter['__setitem__', 'attrs']
222258
assert 'spam' not in a
223-
assert 10 == store.counter['__getitem__', 'attrs']
259+
get_cnt = 10 if zarr_version == 2 else 12
260+
assert get_cnt == store.counter['__getitem__', 'attrs']
224261
assert 3 == store.counter['__setitem__', 'attrs']

0 commit comments

Comments
 (0)