Skip to content

Commit 13a26f8

Browse files
committed
Merge branch 'main' into docs/3.0-async-guide
1 parent 77da71f commit 13a26f8

16 files changed

+275
-208
lines changed

.github/workflows/test.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,8 @@ jobs:
9999
runs-on: ubuntu-latest
100100
steps:
101101
- uses: actions/checkout@v4
102+
with:
103+
fetch-depth: 0 # required for hatch version discovery, which is needed for numcodecs.zarr3
102104
- name: Set up Python
103105
uses: actions/setup-python@v5
104106
with:

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ docs/_build/
5454
docs/_autoapi
5555
docs/data
5656
data
57+
data.zip
5758

5859
# PyBuilder
5960
target/

docs/user-guide/arrays.rst

Lines changed: 98 additions & 102 deletions
Large diffs are not rendered by default.

docs/user-guide/attributes.rst

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,10 @@ Zarr arrays and groups support custom key/value attributes, which can be useful
77
storing application-specific metadata. For example::
88

99
>>> import zarr
10-
>>> # TODO: replace with create_group after #2463
11-
>>> root = zarr.group()
10+
>>> store = zarr.storage.MemoryStore()
11+
>>> root = zarr.create_group(store=store)
1212
>>> root.attrs['foo'] = 'bar'
13-
>>> z = root.zeros(name='zzz', shape=(10000, 10000))
13+
>>> z = root.create_array(name='zzz', shape=(10000, 10000), dtype='int32')
1414
>>> z.attrs['baz'] = 42
1515
>>> z.attrs['qux'] = [1, 4, 7, 12]
1616
>>> sorted(root.attrs)

docs/user-guide/config.rst

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,11 @@ Configuration values can be set using code like the following::
1010

1111
>>> import zarr
1212
>>>
13-
>>> zarr.config.set({"array.order": "F"})
13+
>>> zarr.config.set({'array.order': 'F'})
1414
<donfig.config_obj.ConfigSet object at ...>
1515
>>>
1616
>>> # revert this change so it doesn't impact the rest of the docs
17-
>>> zarr.config.set({"array.order": "C"})
17+
>>> zarr.config.set({'array.order': 'C'})
1818
<donfig.config_obj.ConfigSet object at ...>
1919

2020
Alternatively, configuration values can be set using environment variables, e.g.
@@ -35,8 +35,8 @@ Configuration options include the following:
3535

3636
For selecting custom implementations of codecs, pipelines, buffers and ndbuffers,
3737
first register the implementations in the registry and then select them in the config.
38-
For example, an implementation of the bytes codec in a class "custompackage.NewBytesCodec",
39-
requires the value of ``codecs.bytes.name`` to be "custompackage.NewBytesCodec".
38+
For example, an implementation of the bytes codec in a class ``'custompackage.NewBytesCodec'``,
39+
requires the value of ``codecs.bytes.name`` to be ``'custompackage.NewBytesCodec'``.
4040

4141
This is the current default configuration::
4242

docs/user-guide/consolidated_metadata.rst

Lines changed: 52 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
.. only:: doctest
2+
3+
>>> from pprint import pprint
4+
15
.. _user-guide-consolidated-metadata:
26

37
Consolidated metadata
@@ -29,13 +33,12 @@ attribute of the ``GroupMetadata`` object.
2933
>>> import zarr
3034
>>>
3135
>>> store = zarr.storage.MemoryStore()
32-
>>> # TODO: replace with create_group after #2463
33-
>>> group = zarr.open_group(store=store)
34-
>>> group.create_array(shape=(1,), name="a")
36+
>>> group = zarr.create_group(store=store)
37+
>>> group.create_array(shape=(1,), name='a', dtype='float64')
3538
<Array memory://.../a shape=(1,) dtype=float64>
36-
>>> group.create_array(shape=(2, 2), name="b")
39+
>>> group.create_array(shape=(2, 2), name='b', dtype='float64')
3740
<Array memory://.../b shape=(2, 2) dtype=float64>
38-
>>> group.create_array(shape=(3, 3, 3), name="c")
41+
>>> group.create_array(shape=(3, 3, 3), name='c', dtype='float64')
3942
<Array memory://.../c shape=(3, 3, 3) dtype=float64>
4043
>>> zarr.consolidate_metadata(store)
4144
<Group memory://...>
@@ -45,21 +48,59 @@ that can be used.:
4548

4649
>>> consolidated = zarr.open_group(store=store)
4750
>>> consolidated_metadata = consolidated.metadata.consolidated_metadata.metadata
48-
>>> dict(sorted(consolidated_metadata.items()))
49-
{}
51+
>>> pprint(dict(sorted(consolidated_metadata.items())))
52+
{'a': ArrayV3Metadata(shape=(1,),
53+
data_type=<DataType.float64: 'float64'>,
54+
chunk_grid=RegularChunkGrid(chunk_shape=(1,)),
55+
chunk_key_encoding=DefaultChunkKeyEncoding(name='default',
56+
separator='/'),
57+
fill_value=np.float64(0.0),
58+
codecs=[BytesCodec(endian=<Endian.little: 'little'>),
59+
ZstdCodec(level=0, checksum=False)],
60+
attributes={},
61+
dimension_names=None,
62+
zarr_format=3,
63+
node_type='array',
64+
storage_transformers=()),
65+
'b': ArrayV3Metadata(shape=(2, 2),
66+
data_type=<DataType.float64: 'float64'>,
67+
chunk_grid=RegularChunkGrid(chunk_shape=(2, 2)),
68+
chunk_key_encoding=DefaultChunkKeyEncoding(name='default',
69+
separator='/'),
70+
fill_value=np.float64(0.0),
71+
codecs=[BytesCodec(endian=<Endian.little: 'little'>),
72+
ZstdCodec(level=0, checksum=False)],
73+
attributes={},
74+
dimension_names=None,
75+
zarr_format=3,
76+
node_type='array',
77+
storage_transformers=()),
78+
'c': ArrayV3Metadata(shape=(3, 3, 3),
79+
data_type=<DataType.float64: 'float64'>,
80+
chunk_grid=RegularChunkGrid(chunk_shape=(3, 3, 3)),
81+
chunk_key_encoding=DefaultChunkKeyEncoding(name='default',
82+
separator='/'),
83+
fill_value=np.float64(0.0),
84+
codecs=[BytesCodec(endian=<Endian.little: 'little'>),
85+
ZstdCodec(level=0, checksum=False)],
86+
attributes={},
87+
dimension_names=None,
88+
zarr_format=3,
89+
node_type='array',
90+
storage_transformers=())}
5091

5192
Operations on the group to get children automatically use the consolidated metadata.:
5293

53-
>>> consolidated["a"] # no read / HTTP request to the Store is required
94+
>>> consolidated['a'] # no read / HTTP request to the Store is required
5495
<Array memory://.../a shape=(1,) dtype=float64>
5596

5697
With nested groups, the consolidated metadata is available on the children, recursively.:
5798

58-
>>> child = group.create_group("child", attributes={"kind": "child"})
59-
>>> grandchild = child.create_group("child", attributes={"kind": "grandchild"})
99+
>>> child = group.create_group('child', attributes={'kind': 'child'})
100+
>>> grandchild = child.create_group('child', attributes={'kind': 'grandchild'})
60101
>>> consolidated = zarr.consolidate_metadata(store)
61102
>>>
62-
>>> consolidated["child"].metadata.consolidated_metadata
103+
>>> consolidated['child'].metadata.consolidated_metadata
63104
ConsolidatedMetadata(metadata={'child': GroupMetadata(attributes={'kind': 'grandchild'}, zarr_format=3, consolidated_metadata=ConsolidatedMetadata(metadata={}, kind='inline', must_understand=False), node_type='group')}, kind='inline', must_understand=False)
64105

65106
Synchronization and Concurrency

docs/user-guide/groups.rst

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
.. only:: doctest
2+
3+
>>> import shutil
4+
>>> shutil.rmtree('data', ignore_errors=True)
5+
16
.. _user-guide-groups:
27

38
Working with groups
@@ -10,9 +15,8 @@ support a similar interface.
1015
To create a group, use the :func:`zarr.group` function::
1116

1217
>>> import zarr
13-
>>>
14-
>>> # TODO: replace with create_group after #2463
15-
>>> root = zarr.group()
18+
>>> store = zarr.storage.MemoryStore()
19+
>>> root = zarr.create_group(store=store)
1620
>>> root
1721
<Group memory://...>
1822

@@ -24,7 +28,7 @@ Groups have a similar API to the Group class from `h5py
2428

2529
Groups can also contain arrays, e.g.::
2630

27-
>>> z1 = bar.zeros(name='baz', shape=(10000, 10000), chunks=(1000, 1000), dtype='i4')
31+
>>> z1 = bar.create_array(name='baz', shape=(10000, 10000), chunks=(1000, 1000), dtype='int32')
2832
>>> z1
2933
<Array memory://.../foo/bar/baz shape=(10000, 10000) dtype=int32>
3034

@@ -59,7 +63,7 @@ sub-directories, e.g.::
5963
>>> root
6064
<Group file://data/group.zarr>
6165
>>>
62-
>>> z = root.zeros(name='foo/bar/baz', shape=(10000, 10000), chunks=(1000, 1000), dtype='i4')
66+
>>> z = root.create_array(name='foo/bar/baz', shape=(10000, 10000), chunks=(1000, 1000), dtype='int32')
6367
>>> z
6468
<Array file://data/group.zarr/foo/bar/baz shape=(10000, 10000) dtype=int32>
6569

@@ -77,12 +81,12 @@ Array and group diagnostics
7781
Diagnostic information about arrays and groups is available via the ``info``
7882
property. E.g.::
7983

80-
>>> # TODO: replace with create_group after #2463
81-
>>> root = zarr.group()
84+
>>> store = zarr.storage.MemoryStore()
85+
>>> root = zarr.group(store=store)
8286
>>> foo = root.create_group('foo')
83-
>>> bar = foo.zeros(name='bar', shape=1000000, chunks=100000, dtype='i8')
87+
>>> bar = foo.create_array(name='bar', shape=1000000, chunks=100000, dtype='int64')
8488
>>> bar[:] = 42
85-
>>> baz = foo.zeros(name='baz', shape=(1000, 1000), chunks=(100, 100), dtype='f4')
89+
>>> baz = foo.create_array(name='baz', shape=(1000, 1000), chunks=(100, 100), dtype='float32')
8690
>>> baz[:] = 4.2
8791
>>> root.info
8892
Name :

docs/user-guide/index.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,4 +25,4 @@ Advanced Topics
2525
performance
2626
async
2727
consolidated_metadata
28-
extending
28+
extending

docs/user-guide/performance.rst

Lines changed: 32 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,9 @@
1-
user-guide-performance
1+
.. only:: doctest
2+
3+
>>> import shutil
4+
>>> shutil.rmtree('data', ignore_errors=True)
5+
6+
.. _user-guide-performance:
27

38
Optimizing performance
49
======================
@@ -19,42 +24,41 @@ better performance, at least when using the Blosc compression library.
1924
The optimal chunk shape will depend on how you want to access the data. E.g.,
2025
for a 2-dimensional array, if you only ever take slices along the first
2126
dimension, then chunk across the second dimension. If you know you want to chunk
22-
across an entire dimension you can use ``None`` or ``-1`` within the ``chunks``
23-
argument, e.g.::
27+
across an entire dimension you can use the full size of that dimension within the
28+
``chunks`` argument, e.g.::
2429

2530
>>> import zarr
26-
>>>
27-
>>> z1 = zarr.zeros((10000, 10000), chunks=(100, None), dtype='i4')
31+
>>> z1 = zarr.create_array(store={}, shape=(10000, 10000), chunks=(100, 10000), dtype='int32')
2832
>>> z1.chunks
2933
(100, 10000)
3034

3135
Alternatively, if you only ever take slices along the second dimension, then
3236
chunk across the first dimension, e.g.::
3337

34-
>>> z2 = zarr.zeros((10000, 10000), chunks=(None, 100), dtype='i4')
38+
>>> z2 = zarr.create_array(store={}, shape=(10000, 10000), chunks=(10000, 100), dtype='int32')
3539
>>> z2.chunks
3640
(10000, 100)
3741

3842
If you require reasonable performance for both access patterns then you need to
3943
find a compromise, e.g.::
4044

41-
>>> z3 = zarr.zeros((10000, 10000), chunks=(1000, 1000), dtype='i4')
45+
>>> z3 = zarr.create_array(store={}, shape=(10000, 10000), chunks=(1000, 1000), dtype='int32')
4246
>>> z3.chunks
4347
(1000, 1000)
4448

4549
If you are feeling lazy, you can let Zarr guess a chunk shape for your data by
46-
providing ``chunks=True``, although please note that the algorithm for guessing
50+
providing ``chunks='auto'``, although please note that the algorithm for guessing
4751
a chunk shape is based on simple heuristics and may be far from optimal. E.g.::
4852

49-
>>> z4 = zarr.zeros((10000, 10000), chunks=True, dtype='i4')
53+
>>> z4 = zarr.create_array(store={}, shape=(10000, 10000), chunks='auto', dtype='int32')
5054
>>> z4.chunks
5155
(625, 625)
5256

5357
If you know you are always going to be loading the entire array into memory, you
54-
can turn off chunks by providing ``chunks=False``, in which case there will be
55-
one single chunk for the array::
58+
can turn off chunks by providing ``chunks`` equal to ``shape``, in which case there
59+
will be one single chunk for the array::
5660

57-
>>> z5 = zarr.zeros((10000, 10000), chunks=False, dtype='i4')
61+
>>> z5 = zarr.create_array(store={}, shape=(10000, 10000), chunks=(10000, 10000), dtype='int32')
5862
>>> z5.chunks
5963
(10000, 10000)
6064

@@ -70,9 +74,9 @@ ratios, depending on the correlation structure within the data. E.g.::
7074

7175
>>> import numpy as np
7276
>>>
73-
>>> a = np.arange(100000000, dtype='i4').reshape(10000, 10000).T
74-
>>> # TODO: replace with create_array after #2463
75-
>>> c = zarr.array(a, chunks=(1000, 1000))
77+
>>> a = np.arange(100000000, dtype='int32').reshape(10000, 10000).T
78+
>>> c = zarr.create_array(store={}, shape=a.shape, chunks=(1000, 1000), dtype=a.dtype, config={'order': 'C'})
79+
>>> c[:] = a
7680
>>> c.info_complete()
7781
Type : Array
7882
Zarr format : 3
@@ -88,7 +92,8 @@ ratios, depending on the correlation structure within the data. E.g.::
8892
Storage ratio : 1.2
8993
Chunks Initialized : 100
9094
>>> with zarr.config.set({'array.order': 'F'}):
91-
... f = zarr.array(a, chunks=(1000, 1000))
95+
... f = zarr.create_array(store={}, shape=a.shape, chunks=(1000, 1000), dtype=a.dtype)
96+
... f[:] = a
9297
>>> f.info_complete()
9398
Type : Array
9499
Zarr format : 3
@@ -143,15 +148,14 @@ the time required to write an array with different values.::
143148
... shape = (chunks[0] * 1024,)
144149
... data = np.random.randint(0, 255, shape)
145150
... dtype = 'uint8'
146-
... with zarr.config.set({"array.write_empty_chunks": write_empty_chunks}):
147-
... arr = zarr.open(
148-
... f"data/example-{write_empty_chunks}.zarr",
149-
... shape=shape,
150-
... chunks=chunks,
151-
... dtype=dtype,
152-
... fill_value=0,
153-
... mode='w'
154-
... )
151+
... arr = zarr.create_array(
152+
... f'data/example-{write_empty_chunks}.zarr',
153+
... shape=shape,
154+
... chunks=chunks,
155+
... dtype=dtype,
156+
... fill_value=0,
157+
... config={'write_empty_chunks': write_empty_chunks}
158+
... )
155159
... # initialize all chunks
156160
... arr[:] = 100
157161
... result = []
@@ -208,9 +212,9 @@ to re-open any underlying files or databases upon being unpickled.
208212
E.g., pickle/unpickle an local store array::
209213

210214
>>> import pickle
211-
>>>
212-
>>> # TODO: replace with create_array after #2463
213-
>>> z1 = zarr.array(store="data/example-2", data=np.arange(100000))
215+
>>> data = np.arange(100000)
216+
>>> z1 = zarr.create_array(store='data/example-2.zarr', shape=data.shape, chunks=data.shape, dtype=data.dtype)
217+
>>> z1[:] = data
214218
>>> s = pickle.dumps(z1)
215219
>>> z2 = pickle.loads(s)
216220
>>> z1 == z2

0 commit comments

Comments
 (0)