From 9686d3f58345f3ad50e34c9bafd15f8ad3025333 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Wed, 8 May 2024 15:13:50 -0400 Subject: [PATCH 1/8] Use donfig for sync configuration --- pyproject.toml | 1 + src/zarr/__init__.py | 2 +- src/zarr/config.py | 10 +++++----- src/zarr/group.py | 3 +-- src/zarr/sync.py | 8 +++----- tests/v3/test_sync.py | 2 -- 6 files changed, 11 insertions(+), 15 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 93888a205c..3f5450845f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,6 +19,7 @@ dependencies = [ 'crc32c', 'zstandard', 'typing_extensions', + 'donfig' ] dynamic = [ "version", diff --git a/src/zarr/__init__.py b/src/zarr/__init__.py index 9ae9dc54c4..12c054de75 100644 --- a/src/zarr/__init__.py +++ b/src/zarr/__init__.py @@ -5,7 +5,7 @@ import zarr.codecs # noqa: F401 from zarr.array import Array, AsyncArray # noqa: F401 from zarr.array_v2 import ArrayV2 -from zarr.config import RuntimeConfiguration # noqa: F401 +from zarr.config import config, RuntimeConfiguration # noqa: F401 from zarr.group import AsyncGroup, Group # noqa: F401 from zarr.metadata import runtime_configuration # noqa: F401 from zarr.store import ( # noqa: F401 diff --git a/src/zarr/config.py b/src/zarr/config.py index cd4d82597b..904ff7cd2c 100644 --- a/src/zarr/config.py +++ b/src/zarr/config.py @@ -4,12 +4,12 @@ from dataclasses import dataclass from typing import Any, Literal, Optional +from donfig import Config -@dataclass(frozen=True) -class SyncConfiguration: - concurrency: Optional[int] = None - asyncio_loop: Optional[AbstractEventLoop] = None - timeout: float | None = None +config = Config( + "zarr", + defaults=[{"async": {"concurrency": None, "timeout": None}}], +) def parse_indexing_order(data: Any) -> Literal["C", "F"]: diff --git a/src/zarr/group.py b/src/zarr/group.py index 4da059c814..3331c2ddfe 100644 --- a/src/zarr/group.py +++ b/src/zarr/group.py @@ -18,7 +18,7 @@ from zarr.array import AsyncArray, Array from zarr.attributes import Attributes from zarr.common import ZARR_JSON, ZARRAY_JSON, ZATTRS_JSON, ZGROUP_JSON -from zarr.config import RuntimeConfiguration, SyncConfiguration +from zarr.config import RuntimeConfiguration from zarr.store import StoreLike, StorePath, make_store_path from zarr.sync import SyncMixin, sync @@ -402,7 +402,6 @@ async def move(self, source: str, dest: str) -> None: @dataclass(frozen=True) class Group(SyncMixin): _async_group: AsyncGroup - _sync_configuration: SyncConfiguration = field(init=True, default=SyncConfiguration()) @classmethod def create( diff --git a/src/zarr/sync.py b/src/zarr/sync.py index a152030e89..6c76064b9a 100644 --- a/src/zarr/sync.py +++ b/src/zarr/sync.py @@ -10,7 +10,7 @@ from typing_extensions import ParamSpec -from zarr.config import SyncConfiguration +from zarr.config import config P = ParamSpec("P") T = TypeVar("T") @@ -113,15 +113,13 @@ def _get_loop() -> asyncio.AbstractEventLoop: class SyncMixin: - _sync_configuration: SyncConfiguration - def _sync(self, coroutine: Coroutine[Any, Any, T]) -> T: # TODO: refactor this to to take *args and **kwargs and pass those to the method # this should allow us to better type the sync wrapper return sync( coroutine, - loop=self._sync_configuration.asyncio_loop, - timeout=self._sync_configuration.timeout, + loop=_get_loop(), + timeout=config.get("async.timeout"), ) def _sync_iter(self, async_iterator: AsyncIterator[T]) -> list[T]: diff --git a/tests/v3/test_sync.py b/tests/v3/test_sync.py index 8f644745d2..ba262f521d 100644 --- a/tests/v3/test_sync.py +++ b/tests/v3/test_sync.py @@ -4,7 +4,6 @@ from unittest.mock import patch, AsyncMock from zarr.sync import sync, _get_loop, _get_lock, SyncError, SyncMixin -from zarr.config import SyncConfiguration import pytest @@ -113,7 +112,6 @@ async def bar(self) -> AsyncGenerator: class SyncFoo(SyncMixin): def __init__(self, async_foo: AsyncFoo) -> None: self._async_foo = async_foo - self._sync_configuration = SyncConfiguration(asyncio_loop=sync_loop) def foo(self) -> str: return self._sync(self._async_foo.foo()) From 4fe98669f3c9c56abe83aa28d2b82cfc0f7c57c7 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Thu, 9 May 2024 15:16:59 -0400 Subject: [PATCH 2/8] Consolidate concurrency config --- src/zarr/array.py | 8 ++++---- src/zarr/codecs/sharding.py | 9 +++++---- src/zarr/config.py | 10 ---------- src/zarr/metadata.py | 6 ++---- 4 files changed, 11 insertions(+), 22 deletions(-) diff --git a/src/zarr/array.py b/src/zarr/array.py index 18e26b64dd..aca712f3e3 100644 --- a/src/zarr/array.py +++ b/src/zarr/array.py @@ -29,7 +29,7 @@ SliceSelection, concurrent_map, ) -from zarr.config import RuntimeConfiguration +from zarr.config import RuntimeConfiguration, config from zarr.indexing import BasicIndexer, all_chunk_coords, is_total_slice from zarr.chunk_grids import RegularChunkGrid @@ -214,7 +214,7 @@ async def getitem(self, selection: Selection) -> np.ndarray: for chunk_coords, chunk_selection, out_selection in indexer ], self._read_chunk, - self.runtime_configuration.concurrency, + config.get("async.concurrency"), ) if out.shape: @@ -291,7 +291,7 @@ async def setitem(self, selection: Selection, value: np.ndarray) -> None: for chunk_coords, chunk_selection, out_selection in indexer ], self._write_chunk, - self.runtime_configuration.concurrency, + config.get("async.concurrency"), ) async def _write_chunk( @@ -388,7 +388,7 @@ async def _delete_key(key: str) -> None: for chunk_coords in old_chunk_coords.difference(new_chunk_coords) ], _delete_key, - self.runtime_configuration.concurrency, + config.get("async.concurrency"), ) # Write new metadata diff --git a/src/zarr/codecs/sharding.py b/src/zarr/codecs/sharding.py index d4f8b7dfc9..576b651880 100644 --- a/src/zarr/codecs/sharding.py +++ b/src/zarr/codecs/sharding.py @@ -25,6 +25,7 @@ parse_shapelike, product, ) +from zarr.config import config from zarr.chunk_grids import RegularChunkGrid from zarr.indexing import ( BasicIndexer, @@ -342,7 +343,7 @@ async def decode( for chunk_coords, chunk_selection, out_selection in indexer ], self._read_chunk, - runtime_configuration.concurrency, + config.get("async.concurrency"), ) return out @@ -410,7 +411,7 @@ async def decode_partial( for chunk_coords, chunk_selection, out_selection in indexed_chunks ], self._read_chunk, - runtime_configuration.concurrency, + config.get("async.concurrency"), ) return out @@ -483,7 +484,7 @@ async def _write_chunk( for chunk_coords, chunk_selection, out_selection in indexer ], _write_chunk, - runtime_configuration.concurrency, + config.get("async.concurrency"), ) if len(encoded_chunks) == 0: return None @@ -567,7 +568,7 @@ async def _write_chunk( for chunk_coords, chunk_selection, out_selection in indexer ], _write_chunk, - runtime_configuration.concurrency, + config.get("async.concurrency"), ) for chunk_coords, chunk_bytes in encoded_chunks: diff --git a/src/zarr/config.py b/src/zarr/config.py index 904ff7cd2c..1519fe7b9f 100644 --- a/src/zarr/config.py +++ b/src/zarr/config.py @@ -19,13 +19,6 @@ def parse_indexing_order(data: Any) -> Literal["C", "F"]: raise ValueError(msg) -# todo: handle negative values? -def parse_concurrency(data: Any) -> int | None: - if data is None or isinstance(data, int): - return data - raise TypeError(f"Expected int or None, got {type(data)}") - - def parse_asyncio_loop(data: Any) -> AbstractEventLoop | None: if data is None or isinstance(data, AbstractEventLoop): return data @@ -35,7 +28,6 @@ def parse_asyncio_loop(data: Any) -> AbstractEventLoop | None: @dataclass(frozen=True) class RuntimeConfiguration: order: Literal["C", "F"] = "C" - concurrency: Optional[int] = None asyncio_loop: Optional[AbstractEventLoop] = None def __init__( @@ -45,9 +37,7 @@ def __init__( asyncio_loop: Optional[AbstractEventLoop] = None, ): order_parsed = parse_indexing_order(order) - concurrency_parsed = parse_concurrency(concurrency) asyncio_loop_parsed = parse_asyncio_loop(asyncio_loop) object.__setattr__(self, "order", order_parsed) - object.__setattr__(self, "concurrency", concurrency_parsed) object.__setattr__(self, "asyncio_loop_parsed", asyncio_loop_parsed) diff --git a/src/zarr/metadata.py b/src/zarr/metadata.py index 8eba9a0b5a..719d22a41c 100644 --- a/src/zarr/metadata.py +++ b/src/zarr/metadata.py @@ -29,10 +29,8 @@ from zarr.config import RuntimeConfiguration, parse_indexing_order -def runtime_configuration( - order: Literal["C", "F"], concurrency: Optional[int] = None -) -> RuntimeConfiguration: - return RuntimeConfiguration(order=order, concurrency=concurrency) +def runtime_configuration(order: Literal["C", "F"]) -> RuntimeConfiguration: + return RuntimeConfiguration(order=order) # For type checking From 923bee4e74fcedb343edc8394778d35811386891 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Thu, 9 May 2024 15:19:51 -0400 Subject: [PATCH 3/8] Remove unused parameter --- src/zarr/config.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/zarr/config.py b/src/zarr/config.py index 1519fe7b9f..5e19ade1ad 100644 --- a/src/zarr/config.py +++ b/src/zarr/config.py @@ -33,7 +33,6 @@ class RuntimeConfiguration: def __init__( self, order: Literal["C", "F"] = "C", - concurrency: Optional[int] = None, asyncio_loop: Optional[AbstractEventLoop] = None, ): order_parsed = parse_indexing_order(order) From e03eed545ff3bfe33ba2c8f133803e7b9e840980 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Thu, 9 May 2024 15:22:13 -0700 Subject: [PATCH 4/8] finish removing runtime config -- a few todos remain --- src/zarr/__init__.py | 12 +-- src/zarr/abc/codec.py | 9 --- src/zarr/array.py | 64 ++++----------- src/zarr/array_v2.py | 25 ++---- src/zarr/codecs/blosc.py | 3 - src/zarr/codecs/bytes.py | 3 - src/zarr/codecs/crc32c_.py | 3 - src/zarr/codecs/gzip.py | 3 - src/zarr/codecs/pipeline.py | 29 ++----- src/zarr/codecs/sharding.py | 25 ++---- src/zarr/codecs/transpose.py | 4 +- src/zarr/codecs/zstd.py | 3 - src/zarr/common.py | 26 +++++- src/zarr/config.py | 27 +------ src/zarr/group.py | 33 ++------ src/zarr/metadata.py | 14 ++-- src/zarr/sync.py | 1 - tests/v3/package_with_entrypoint/__init__.py | 3 - tests/v3/test_codecs.py | 85 ++++++++++---------- tests/v3/test_group.py | 3 - 20 files changed, 120 insertions(+), 255 deletions(-) diff --git a/src/zarr/__init__.py b/src/zarr/__init__.py index 12c054de75..a8aff30b52 100644 --- a/src/zarr/__init__.py +++ b/src/zarr/__init__.py @@ -5,9 +5,8 @@ import zarr.codecs # noqa: F401 from zarr.array import Array, AsyncArray # noqa: F401 from zarr.array_v2 import ArrayV2 -from zarr.config import config, RuntimeConfiguration # noqa: F401 +from zarr.config import config # noqa: F401 from zarr.group import AsyncGroup, Group # noqa: F401 -from zarr.metadata import runtime_configuration # noqa: F401 from zarr.store import ( # noqa: F401 StoreLike, make_store_path, @@ -21,22 +20,19 @@ async def open_auto_async( store: StoreLike, - runtime_configuration_: RuntimeConfiguration = RuntimeConfiguration(), ) -> Union[AsyncArray, AsyncGroup]: store_path = make_store_path(store) try: - return await AsyncArray.open(store_path, runtime_configuration=runtime_configuration_) + return await AsyncArray.open(store_path) except KeyError: - return await AsyncGroup.open(store_path, runtime_configuration=runtime_configuration_) + return await AsyncGroup.open(store_path) def open_auto( store: StoreLike, - runtime_configuration_: RuntimeConfiguration = RuntimeConfiguration(), ) -> Union[Array, ArrayV2, Group]: object = _sync( - open_auto_async(store, runtime_configuration_), - runtime_configuration_.asyncio_loop, + open_auto_async(store), ) if isinstance(object, AsyncArray): return Array(object) diff --git a/src/zarr/abc/codec.py b/src/zarr/abc/codec.py index 1abc21b30b..8897cced89 100644 --- a/src/zarr/abc/codec.py +++ b/src/zarr/abc/codec.py @@ -14,7 +14,6 @@ from typing_extensions import Self from zarr.common import BytesLike, SliceSelection from zarr.metadata import ArrayMetadata - from zarr.config import RuntimeConfiguration class Codec(Metadata): @@ -40,7 +39,6 @@ async def decode( self, chunk_array: np.ndarray, chunk_spec: ArraySpec, - runtime_configuration: RuntimeConfiguration, ) -> np.ndarray: pass @@ -49,7 +47,6 @@ async def encode( self, chunk_array: np.ndarray, chunk_spec: ArraySpec, - runtime_configuration: RuntimeConfiguration, ) -> Optional[np.ndarray]: pass @@ -60,7 +57,6 @@ async def decode( self, chunk_array: BytesLike, chunk_spec: ArraySpec, - runtime_configuration: RuntimeConfiguration, ) -> np.ndarray: pass @@ -69,7 +65,6 @@ async def encode( self, chunk_array: np.ndarray, chunk_spec: ArraySpec, - runtime_configuration: RuntimeConfiguration, ) -> Optional[BytesLike]: pass @@ -81,7 +76,6 @@ async def decode_partial( store_path: StorePath, selection: SliceSelection, chunk_spec: ArraySpec, - runtime_configuration: RuntimeConfiguration, ) -> Optional[np.ndarray]: pass @@ -94,7 +88,6 @@ async def encode_partial( chunk_array: np.ndarray, selection: SliceSelection, chunk_spec: ArraySpec, - runtime_configuration: RuntimeConfiguration, ) -> None: pass @@ -105,7 +98,6 @@ async def decode( self, chunk_array: BytesLike, chunk_spec: ArraySpec, - runtime_configuration: RuntimeConfiguration, ) -> BytesLike: pass @@ -114,6 +106,5 @@ async def encode( self, chunk_array: BytesLike, chunk_spec: ArraySpec, - runtime_configuration: RuntimeConfiguration, ) -> Optional[BytesLike]: pass diff --git a/src/zarr/array.py b/src/zarr/array.py index aca712f3e3..b84f7eca6a 100644 --- a/src/zarr/array.py +++ b/src/zarr/array.py @@ -6,7 +6,6 @@ # Questions to consider: # 1. Was splitting the array into two classes really necessary? -# 2. Do we really need runtime_configuration? Specifically, the asyncio_loop seems problematic from __future__ import annotations @@ -29,7 +28,7 @@ SliceSelection, concurrent_map, ) -from zarr.config import RuntimeConfiguration, config +from zarr.config import config from zarr.indexing import BasicIndexer, all_chunk_coords, is_total_slice from zarr.chunk_grids import RegularChunkGrid @@ -52,7 +51,7 @@ def parse_array_metadata(data: Any) -> ArrayMetadata: class AsyncArray: metadata: ArrayMetadata store_path: StorePath - runtime_configuration: RuntimeConfiguration + _order: Literal["C", "F"] @property def codecs(self): @@ -62,13 +61,12 @@ def __init__( self, metadata: ArrayMetadata, store_path: StorePath, - runtime_configuration: RuntimeConfiguration, ): metadata_parsed = parse_array_metadata(metadata) object.__setattr__(self, "metadata", metadata_parsed) object.__setattr__(self, "store_path", store_path) - object.__setattr__(self, "runtime_configuration", runtime_configuration) + object.__setattr__(self, "_order", config.get("order", "C")) @classmethod async def create( @@ -86,7 +84,6 @@ async def create( codecs: Optional[Iterable[Union[Codec, Dict[str, Any]]]] = None, dimension_names: Optional[Iterable[str]] = None, attributes: Optional[Dict[str, Any]] = None, - runtime_configuration: RuntimeConfiguration = RuntimeConfiguration(), exists_ok: bool = False, ) -> AsyncArray: store_path = make_store_path(store) @@ -115,12 +112,10 @@ async def create( dimension_names=tuple(dimension_names) if dimension_names else None, attributes=attributes or {}, ) - runtime_configuration = runtime_configuration or RuntimeConfiguration() array = cls( metadata=metadata, store_path=store_path, - runtime_configuration=runtime_configuration, ) await array._save_metadata() @@ -131,19 +126,15 @@ def from_dict( cls, store_path: StorePath, data: Dict[str, Any], - runtime_configuration: RuntimeConfiguration, ) -> AsyncArray: metadata = ArrayMetadata.from_dict(data) - async_array = cls( - metadata=metadata, store_path=store_path, runtime_configuration=runtime_configuration - ) + async_array = cls(metadata=metadata, store_path=store_path) return async_array @classmethod async def open( cls, store: StoreLike, - runtime_configuration: RuntimeConfiguration = RuntimeConfiguration(), ) -> AsyncArray: store_path = make_store_path(store) zarr_json_bytes = await (store_path / ZARR_JSON).get() @@ -151,14 +142,12 @@ async def open( return cls.from_dict( store_path, json.loads(zarr_json_bytes), - runtime_configuration=runtime_configuration, ) @classmethod async def open_auto( cls, store: StoreLike, - runtime_configuration: RuntimeConfiguration = RuntimeConfiguration(), ) -> AsyncArray: # TODO: Union[AsyncArray, ArrayV2] store_path = make_store_path(store) v3_metadata_bytes = await (store_path / ZARR_JSON).get() @@ -166,7 +155,6 @@ async def open_auto( return cls.from_dict( store_path, json.loads(v3_metadata_bytes), - runtime_configuration=runtime_configuration or RuntimeConfiguration(), ) else: raise ValueError("no v2 support yet") @@ -204,7 +192,7 @@ async def getitem(self, selection: Selection) -> np.ndarray: out = np.zeros( indexer.shape, dtype=self.metadata.dtype, - order=self.runtime_configuration.order, + order=self._order, ) # reading chunks and decoding them @@ -232,15 +220,13 @@ async def _read_chunk( out_selection: SliceSelection, out: np.ndarray, ) -> None: - chunk_spec = self.metadata.get_chunk_spec(chunk_coords) + chunk_spec = self.metadata.get_chunk_spec(chunk_coords, self._order) chunk_key_encoding = self.metadata.chunk_key_encoding chunk_key = chunk_key_encoding.encode_chunk_key(chunk_coords) store_path = self.store_path / chunk_key if self.codecs.supports_partial_decode: - chunk_array = await self.codecs.decode_partial( - store_path, chunk_selection, chunk_spec, self.runtime_configuration - ) + chunk_array = await self.codecs.decode_partial(store_path, chunk_selection, chunk_spec) if chunk_array is not None: out[out_selection] = chunk_array else: @@ -248,9 +234,7 @@ async def _read_chunk( else: chunk_bytes = await store_path.get() if chunk_bytes is not None: - chunk_array = await self.codecs.decode( - chunk_bytes, chunk_spec, self.runtime_configuration - ) + chunk_array = await self.codecs.decode(chunk_bytes, chunk_spec) tmp = chunk_array[chunk_selection] out[out_selection] = tmp else: @@ -302,7 +286,7 @@ async def _write_chunk( chunk_selection: SliceSelection, out_selection: SliceSelection, ) -> None: - chunk_spec = self.metadata.get_chunk_spec(chunk_coords) + chunk_spec = self.metadata.get_chunk_spec(chunk_coords, self._order) chunk_key_encoding = self.metadata.chunk_key_encoding chunk_key = chunk_key_encoding.encode_chunk_key(chunk_coords) store_path = self.store_path / chunk_key @@ -326,7 +310,6 @@ async def _write_chunk( value[out_selection], chunk_selection, chunk_spec, - self.runtime_configuration, ) else: # writing partial chunks @@ -342,7 +325,7 @@ async def _write_chunk( chunk_array.fill(self.metadata.fill_value) else: chunk_array = ( - await self.codecs.decode(chunk_bytes, chunk_spec, self.runtime_configuration) + await self.codecs.decode(chunk_bytes, chunk_spec) ).copy() # make a writable copy chunk_array[chunk_selection] = value[out_selection] @@ -355,9 +338,7 @@ async def _write_chunk_to_store( # chunks that only contain fill_value will be removed await store_path.delete() else: - chunk_bytes = await self.codecs.encode( - chunk_array, chunk_spec, self.runtime_configuration - ) + chunk_bytes = await self.codecs.encode(chunk_array, chunk_spec) if chunk_bytes is None: await store_path.delete() else: @@ -429,7 +410,6 @@ def create( codecs: Optional[Iterable[Union[Codec, Dict[str, Any]]]] = None, dimension_names: Optional[Iterable[str]] = None, attributes: Optional[Dict[str, Any]] = None, - runtime_configuration: RuntimeConfiguration = RuntimeConfiguration(), exists_ok: bool = False, ) -> Array: async_array = sync( @@ -443,10 +423,8 @@ def create( codecs=codecs, dimension_names=dimension_names, attributes=attributes, - runtime_configuration=runtime_configuration, exists_ok=exists_ok, ), - runtime_configuration.asyncio_loop, ) return cls(async_array) @@ -455,34 +433,25 @@ def from_dict( cls, store_path: StorePath, data: Dict[str, Any], - runtime_configuration: RuntimeConfiguration, ) -> Array: - async_array = AsyncArray.from_dict( - store_path=store_path, data=data, runtime_configuration=runtime_configuration - ) + async_array = AsyncArray.from_dict(store_path=store_path, data=data) return cls(async_array) @classmethod def open( cls, store: StoreLike, - runtime_configuration: RuntimeConfiguration = RuntimeConfiguration(), ) -> Array: - async_array = sync( - AsyncArray.open(store, runtime_configuration=runtime_configuration), - runtime_configuration.asyncio_loop, - ) + async_array = sync(AsyncArray.open(store)) return cls(async_array) @classmethod def open_auto( cls, store: StoreLike, - runtime_configuration: RuntimeConfiguration = RuntimeConfiguration(), ) -> Array: # TODO: Union[Array, ArrayV2]: async_array = sync( - AsyncArray.open_auto(store, runtime_configuration), - runtime_configuration.asyncio_loop, + AsyncArray.open_auto(store), ) return cls(async_array) @@ -517,20 +486,17 @@ def store_path(self) -> StorePath: def __getitem__(self, selection: Selection) -> np.ndarray: return sync( self._async_array.getitem(selection), - self._async_array.runtime_configuration.asyncio_loop, ) def __setitem__(self, selection: Selection, value: np.ndarray) -> None: sync( self._async_array.setitem(selection, value), - self._async_array.runtime_configuration.asyncio_loop, ) def resize(self, new_shape: ChunkCoords) -> Array: return type(self)( sync( self._async_array.resize(new_shape), - self._async_array.runtime_configuration.asyncio_loop, ) ) @@ -538,7 +504,6 @@ def update_attributes(self, new_attributes: Dict[str, Any]) -> Array: return type(self)( sync( self._async_array.update_attributes(new_attributes), - self._async_array.runtime_configuration.asyncio_loop, ) ) @@ -548,5 +513,4 @@ def __repr__(self): def info(self): return sync( self._async_array.info(), - self._async_array.runtime_configuration.asyncio_loop, ) diff --git a/src/zarr/array_v2.py b/src/zarr/array_v2.py index 8c2cd3faec..18251e7db7 100644 --- a/src/zarr/array_v2.py +++ b/src/zarr/array_v2.py @@ -20,7 +20,6 @@ concurrent_map, to_thread, ) -from zarr.config import RuntimeConfiguration from zarr.indexing import BasicIndexer, all_chunk_coords, is_total_slice from zarr.metadata import ArrayV2Metadata from zarr.store import StoreLike, StorePath, make_store_path @@ -55,7 +54,6 @@ class ArrayV2: metadata: ArrayV2Metadata attributes: Optional[Dict[str, Any]] store_path: StorePath - runtime_configuration: RuntimeConfiguration @classmethod async def create_async( @@ -72,7 +70,6 @@ async def create_async( compressor: Optional[Dict[str, Any]] = None, attributes: Optional[Dict[str, Any]] = None, exists_ok: bool = False, - runtime_configuration: RuntimeConfiguration = RuntimeConfiguration(), ) -> ArrayV2: store_path = make_store_path(store) if not exists_ok: @@ -98,7 +95,6 @@ async def create_async( metadata=metadata, store_path=store_path, attributes=attributes, - runtime_configuration=runtime_configuration, ) await array._save_metadata() return array @@ -118,7 +114,6 @@ def create( compressor: Optional[Dict[str, Any]] = None, attributes: Optional[Dict[str, Any]] = None, exists_ok: bool = False, - runtime_configuration: RuntimeConfiguration = RuntimeConfiguration(), ) -> ArrayV2: return sync( cls.create_async( @@ -133,16 +128,13 @@ def create( filters=filters, attributes=attributes, exists_ok=exists_ok, - runtime_configuration=runtime_configuration, ), - runtime_configuration.asyncio_loop, ) @classmethod async def open_async( cls, store: StoreLike, - runtime_configuration: RuntimeConfiguration = RuntimeConfiguration(), ) -> ArrayV2: store_path = make_store_path(store) zarray_bytes, zattrs_bytes = await asyncio.gather( @@ -154,18 +146,15 @@ async def open_async( store_path, zarray_json=json.loads(zarray_bytes), zattrs_json=json.loads(zattrs_bytes) if zattrs_bytes is not None else None, - runtime_configuration=runtime_configuration, ) @classmethod def open( cls, store: StoreLike, - runtime_configuration: RuntimeConfiguration = RuntimeConfiguration(), ) -> ArrayV2: return sync( - cls.open_async(store, runtime_configuration), - runtime_configuration.asyncio_loop, + cls.open_async(store), ) @classmethod @@ -174,14 +163,12 @@ def from_dict( store_path: StorePath, zarray_json: Any, zattrs_json: Optional[Any], - runtime_configuration: RuntimeConfiguration = RuntimeConfiguration(), ) -> ArrayV2: metadata = ArrayV2Metadata.from_dict(zarray_json) out = cls( store_path=store_path, metadata=metadata, attributes=zattrs_json, - runtime_configuration=runtime_configuration, ) out._validate_metadata() return out @@ -219,7 +206,7 @@ def async_(self) -> _AsyncArrayProxy: return _AsyncArrayProxy(self) def __getitem__(self, selection: Selection): - return sync(self.get_async(selection), self.runtime_configuration.asyncio_loop) + return sync(self.get_async(selection)) async def get_async(self, selection: Selection): indexer = BasicIndexer( @@ -295,7 +282,7 @@ async def _decode_chunk(self, chunk_bytes: Optional[BytesLike]) -> Optional[np.n return chunk_array def __setitem__(self, selection: Selection, value: np.ndarray) -> None: - sync(self.set_async(selection, value), self.runtime_configuration.asyncio_loop) + sync(self.set_async(selection, value)) async def set_async(self, selection: Selection, value: np.ndarray) -> None: chunk_shape = self.metadata.chunks @@ -436,7 +423,7 @@ async def _delete_key(key: str) -> None: return replace(self, metadata=new_metadata) def resize(self, new_shape: ChunkCoords) -> ArrayV2: - return sync(self.resize_async(new_shape), self.runtime_configuration.asyncio_loop) + return sync(self.resize_async(new_shape)) async def convert_to_v3_async(self) -> Array: from sys import byteorder as sys_byteorder @@ -511,7 +498,6 @@ async def convert_to_v3_async(self) -> Array: return Array.from_dict( store_path=self.store_path, data=json.loads(new_metadata_bytes), - runtime_configuration=self.runtime_configuration, ) async def update_attributes_async(self, new_attributes: Dict[str, Any]) -> ArrayV2: @@ -521,11 +507,10 @@ async def update_attributes_async(self, new_attributes: Dict[str, Any]) -> Array def update_attributes(self, new_attributes: Dict[str, Any]) -> ArrayV2: return sync( self.update_attributes_async(new_attributes), - self.runtime_configuration.asyncio_loop, ) def convert_to_v3(self) -> Array: - return sync(self.convert_to_v3_async(), loop=self.runtime_configuration.asyncio_loop) + return sync(self.convert_to_v3_async()) def __repr__(self): return f"" diff --git a/src/zarr/codecs/blosc.py b/src/zarr/codecs/blosc.py index 374375e6c2..5ee2b7640d 100644 --- a/src/zarr/codecs/blosc.py +++ b/src/zarr/codecs/blosc.py @@ -17,7 +17,6 @@ from typing import Dict, Optional from typing_extensions import Self from zarr.common import JSON, ArraySpec, BytesLike - from zarr.config import RuntimeConfiguration class BloscShuffle(Enum): @@ -163,7 +162,6 @@ async def decode( self, chunk_bytes: bytes, _chunk_spec: ArraySpec, - _runtime_configuration: RuntimeConfiguration, ) -> BytesLike: return await to_thread(self._blosc_codec.decode, chunk_bytes) @@ -171,7 +169,6 @@ async def encode( self, chunk_bytes: bytes, chunk_spec: ArraySpec, - _runtime_configuration: RuntimeConfiguration, ) -> Optional[BytesLike]: chunk_array = np.frombuffer(chunk_bytes, dtype=chunk_spec.dtype) return await to_thread(self._blosc_codec.encode, chunk_array) diff --git a/src/zarr/codecs/bytes.py b/src/zarr/codecs/bytes.py index aa24c3167e..566b3a8df9 100644 --- a/src/zarr/codecs/bytes.py +++ b/src/zarr/codecs/bytes.py @@ -13,7 +13,6 @@ if TYPE_CHECKING: from zarr.common import JSON, ArraySpec, BytesLike - from zarr.config import RuntimeConfiguration from typing_extensions import Self @@ -72,7 +71,6 @@ async def decode( self, chunk_bytes: BytesLike, chunk_spec: ArraySpec, - _runtime_configuration: RuntimeConfiguration, ) -> np.ndarray: if chunk_spec.dtype.itemsize > 0: if self.endian == Endian.little: @@ -95,7 +93,6 @@ async def encode( self, chunk_array: np.ndarray, _chunk_spec: ArraySpec, - _runtime_configuration: RuntimeConfiguration, ) -> Optional[BytesLike]: if chunk_array.dtype.itemsize > 1: byteorder = self._get_byteorder(chunk_array) diff --git a/src/zarr/codecs/crc32c_.py b/src/zarr/codecs/crc32c_.py index 04d5b88d70..dd61b3425e 100644 --- a/src/zarr/codecs/crc32c_.py +++ b/src/zarr/codecs/crc32c_.py @@ -15,7 +15,6 @@ from typing import Dict, Optional from typing_extensions import Self from zarr.common import JSON, BytesLike, ArraySpec - from zarr.config import RuntimeConfiguration @dataclass(frozen=True) @@ -34,7 +33,6 @@ async def decode( self, chunk_bytes: bytes, _chunk_spec: ArraySpec, - _runtime_configuration: RuntimeConfiguration, ) -> BytesLike: crc32_bytes = chunk_bytes[-4:] inner_bytes = chunk_bytes[:-4] @@ -52,7 +50,6 @@ async def encode( self, chunk_bytes: bytes, _chunk_spec: ArraySpec, - _runtime_configuration: RuntimeConfiguration, ) -> Optional[BytesLike]: return chunk_bytes + np.uint32(crc32c(chunk_bytes)).tobytes() diff --git a/src/zarr/codecs/gzip.py b/src/zarr/codecs/gzip.py index f75f5b743e..71dcaa6bb5 100644 --- a/src/zarr/codecs/gzip.py +++ b/src/zarr/codecs/gzip.py @@ -12,7 +12,6 @@ from typing import Optional, Dict from typing_extensions import Self from zarr.common import JSON, ArraySpec, BytesLike - from zarr.config import RuntimeConfiguration def parse_gzip_level(data: JSON) -> int: @@ -48,7 +47,6 @@ async def decode( self, chunk_bytes: bytes, _chunk_spec: ArraySpec, - _runtime_configuration: RuntimeConfiguration, ) -> BytesLike: return await to_thread(GZip(self.level).decode, chunk_bytes) @@ -56,7 +54,6 @@ async def encode( self, chunk_bytes: bytes, _chunk_spec: ArraySpec, - _runtime_configuration: RuntimeConfiguration, ) -> Optional[BytesLike]: return await to_thread(GZip(self.level).encode, chunk_bytes) diff --git a/src/zarr/codecs/pipeline.py b/src/zarr/codecs/pipeline.py index 4908ee8057..c2338f717d 100644 --- a/src/zarr/codecs/pipeline.py +++ b/src/zarr/codecs/pipeline.py @@ -21,7 +21,6 @@ from typing import Iterator, List, Optional, Tuple, Union from zarr.store import StorePath from zarr.metadata import ArrayMetadata - from zarr.config import RuntimeConfiguration from zarr.common import JSON, ArraySpec, BytesLike, SliceSelection @@ -151,7 +150,6 @@ async def decode( self, chunk_bytes: BytesLike, array_spec: ArraySpec, - runtime_configuration: RuntimeConfiguration, ) -> np.ndarray: ( aa_codecs_with_spec, @@ -160,13 +158,13 @@ async def decode( ) = self._codecs_with_resolved_metadata(array_spec) for bb_codec, array_spec in bb_codecs_with_spec[::-1]: - chunk_bytes = await bb_codec.decode(chunk_bytes, array_spec, runtime_configuration) + chunk_bytes = await bb_codec.decode(chunk_bytes, array_spec) ab_codec, array_spec = ab_codec_with_spec - chunk_array = await ab_codec.decode(chunk_bytes, array_spec, runtime_configuration) + chunk_array = await ab_codec.decode(chunk_bytes, array_spec) for aa_codec, array_spec in aa_codecs_with_spec[::-1]: - chunk_array = await aa_codec.decode(chunk_array, array_spec, runtime_configuration) + chunk_array = await aa_codec.decode(chunk_array, array_spec) return chunk_array @@ -175,19 +173,15 @@ async def decode_partial( store_path: StorePath, selection: SliceSelection, chunk_spec: ArraySpec, - runtime_configuration: RuntimeConfiguration, ) -> Optional[np.ndarray]: assert self.supports_partial_decode assert isinstance(self.array_bytes_codec, ArrayBytesCodecPartialDecodeMixin) - return await self.array_bytes_codec.decode_partial( - store_path, selection, chunk_spec, runtime_configuration - ) + return await self.array_bytes_codec.decode_partial(store_path, selection, chunk_spec) async def encode( self, chunk_array: np.ndarray, array_spec: ArraySpec, - runtime_configuration: RuntimeConfiguration, ) -> Optional[BytesLike]: ( aa_codecs_with_spec, @@ -196,23 +190,19 @@ async def encode( ) = self._codecs_with_resolved_metadata(array_spec) for aa_codec, array_spec in aa_codecs_with_spec: - chunk_array_maybe = await aa_codec.encode( - chunk_array, array_spec, runtime_configuration - ) + chunk_array_maybe = await aa_codec.encode(chunk_array, array_spec) if chunk_array_maybe is None: return None chunk_array = chunk_array_maybe ab_codec, array_spec = ab_codec_with_spec - chunk_bytes_maybe = await ab_codec.encode(chunk_array, array_spec, runtime_configuration) + chunk_bytes_maybe = await ab_codec.encode(chunk_array, array_spec) if chunk_bytes_maybe is None: return None chunk_bytes = chunk_bytes_maybe for bb_codec, array_spec in bb_codecs_with_spec: - chunk_bytes_maybe = await bb_codec.encode( - chunk_bytes, array_spec, runtime_configuration - ) + chunk_bytes_maybe = await bb_codec.encode(chunk_bytes, array_spec) if chunk_bytes_maybe is None: return None chunk_bytes = chunk_bytes_maybe @@ -225,13 +215,10 @@ async def encode_partial( chunk_array: np.ndarray, selection: SliceSelection, chunk_spec: ArraySpec, - runtime_configuration: RuntimeConfiguration, ) -> None: assert self.supports_partial_encode assert isinstance(self.array_bytes_codec, ArrayBytesCodecPartialEncodeMixin) - await self.array_bytes_codec.encode_partial( - store_path, chunk_array, selection, chunk_spec, runtime_configuration - ) + await self.array_bytes_codec.encode_partial(store_path, chunk_array, selection, chunk_spec) def compute_encoded_size(self, byte_length: int, array_spec: ArraySpec) -> int: for codec in self: diff --git a/src/zarr/codecs/sharding.py b/src/zarr/codecs/sharding.py index 576b651880..29da3ac9cf 100644 --- a/src/zarr/codecs/sharding.py +++ b/src/zarr/codecs/sharding.py @@ -35,7 +35,6 @@ ) from zarr.metadata import ( ArrayMetadata, - runtime_configuration as make_runtime_configuration, parse_codecs, ) @@ -50,7 +49,6 @@ BytesLike, SliceSelection, ) - from zarr.config import RuntimeConfiguration MAX_UINT_64 = 2**64 - 1 @@ -303,7 +301,6 @@ async def decode( self, shard_bytes: BytesLike, shard_spec: ArraySpec, - runtime_configuration: RuntimeConfiguration, ) -> np.ndarray: # print("decode") shard_shape = shard_spec.shape @@ -320,7 +317,7 @@ async def decode( out = np.zeros( shard_shape, dtype=shard_spec.dtype, - order=runtime_configuration.order, + order=shard_spec.order, ) shard_dict = await _ShardProxy.from_bytes(shard_bytes, self, chunks_per_shard) @@ -337,7 +334,6 @@ async def decode( chunk_selection, out_selection, shard_spec, - runtime_configuration, out, ) for chunk_coords, chunk_selection, out_selection in indexer @@ -353,7 +349,6 @@ async def decode_partial( store_path: StorePath, selection: SliceSelection, shard_spec: ArraySpec, - runtime_configuration: RuntimeConfiguration, ) -> Optional[np.ndarray]: shard_shape = shard_spec.shape chunk_shape = self.chunk_shape @@ -369,7 +364,7 @@ async def decode_partial( out = np.zeros( indexer.shape, dtype=shard_spec.dtype, - order=runtime_configuration.order, + order=shard_spec.order, ) indexed_chunks = list(indexer) @@ -405,7 +400,6 @@ async def decode_partial( chunk_selection, out_selection, shard_spec, - runtime_configuration, out, ) for chunk_coords, chunk_selection, out_selection in indexed_chunks @@ -423,13 +417,12 @@ async def _read_chunk( chunk_selection: SliceSelection, out_selection: SliceSelection, shard_spec: ArraySpec, - runtime_configuration: RuntimeConfiguration, out: np.ndarray, ) -> None: chunk_spec = self._get_chunk_spec(shard_spec) chunk_bytes = shard_dict.get(chunk_coords, None) if chunk_bytes is not None: - chunk_array = await self.codecs.decode(chunk_bytes, chunk_spec, runtime_configuration) + chunk_array = await self.codecs.decode(chunk_bytes, chunk_spec) tmp = chunk_array[chunk_selection] out[out_selection] = tmp else: @@ -439,7 +432,6 @@ async def encode( self, shard_array: np.ndarray, shard_spec: ArraySpec, - runtime_configuration: RuntimeConfiguration, ) -> Optional[BytesLike]: shard_shape = shard_spec.shape chunk_shape = self.chunk_shape @@ -473,7 +465,7 @@ async def _write_chunk( chunk_spec = self._get_chunk_spec(shard_spec) return ( chunk_coords, - await self.codecs.encode(chunk_array, chunk_spec, runtime_configuration), + await self.codecs.encode(chunk_array, chunk_spec), ) return (chunk_coords, None) @@ -502,7 +494,6 @@ async def encode_partial( shard_array: np.ndarray, selection: SliceSelection, shard_spec: ArraySpec, - runtime_configuration: RuntimeConfiguration, ) -> None: # print("encode_partial") shard_shape = shard_spec.shape @@ -546,14 +537,14 @@ async def _write_chunk( chunk_array.fill(shard_spec.fill_value) else: chunk_array = ( - await self.codecs.decode(chunk_bytes, chunk_spec, runtime_configuration) + await self.codecs.decode(chunk_bytes, chunk_spec) ).copy() # make a writable copy chunk_array[chunk_selection] = shard_array[out_selection] if not np.array_equiv(chunk_array, shard_spec.fill_value): return ( chunk_coords, - await self.codecs.encode(chunk_array, chunk_spec, runtime_configuration), + await self.codecs.encode(chunk_array, chunk_spec), ) else: return (chunk_coords, None) @@ -608,7 +599,6 @@ async def _decode_shard_index( await self.index_codecs.decode( index_bytes, self._get_index_chunk_spec(chunks_per_shard), - make_runtime_configuration("C"), ) ) @@ -616,7 +606,6 @@ async def _encode_shard_index(self, index: _ShardIndex) -> BytesLike: index_bytes = await self.index_codecs.encode( index.offsets_and_lengths, self._get_index_chunk_spec(index.chunks_per_shard), - make_runtime_configuration("C"), ) assert index_bytes is not None return index_bytes @@ -632,6 +621,7 @@ def _get_index_chunk_spec(self, chunks_per_shard: ChunkCoords) -> ArraySpec: shape=chunks_per_shard + (2,), dtype=np.dtype(" ArraySpec: shape=self.chunk_shape, dtype=shard_spec.dtype, fill_value=shard_spec.fill_value, + order=shard_spec.order, ) @lru_cache diff --git a/src/zarr/codecs/transpose.py b/src/zarr/codecs/transpose.py index c63327f6fc..a13708955c 100644 --- a/src/zarr/codecs/transpose.py +++ b/src/zarr/codecs/transpose.py @@ -6,7 +6,6 @@ from zarr.common import JSON, ArraySpec, ChunkCoordsLike, parse_named_configuration if TYPE_CHECKING: - from zarr.config import RuntimeConfiguration from typing import TYPE_CHECKING, Optional, Tuple from typing_extensions import Self @@ -71,13 +70,13 @@ def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec: shape=tuple(chunk_spec.shape[self.order[i]] for i in range(chunk_spec.ndim)), dtype=chunk_spec.dtype, fill_value=chunk_spec.fill_value, + order=chunk_spec.order, ) async def decode( self, chunk_array: np.ndarray, chunk_spec: ArraySpec, - _runtime_configuration: RuntimeConfiguration, ) -> np.ndarray: inverse_order = [0] * chunk_spec.ndim for x, i in enumerate(self.order): @@ -89,7 +88,6 @@ async def encode( self, chunk_array: np.ndarray, chunk_spec: ArraySpec, - _runtime_configuration: RuntimeConfiguration, ) -> Optional[np.ndarray]: chunk_array = chunk_array.transpose(self.order) return chunk_array diff --git a/src/zarr/codecs/zstd.py b/src/zarr/codecs/zstd.py index 41db850ab6..ad10a7fdb8 100644 --- a/src/zarr/codecs/zstd.py +++ b/src/zarr/codecs/zstd.py @@ -12,7 +12,6 @@ if TYPE_CHECKING: from typing import Dict, Optional from typing_extensions import Self - from zarr.config import RuntimeConfiguration from zarr.common import BytesLike, JSON, ArraySpec @@ -64,7 +63,6 @@ async def decode( self, chunk_bytes: bytes, _chunk_spec: ArraySpec, - _runtime_configuration: RuntimeConfiguration, ) -> BytesLike: return await to_thread(self._decompress, chunk_bytes) @@ -72,7 +70,6 @@ async def encode( self, chunk_bytes: bytes, _chunk_spec: ArraySpec, - _runtime_configuration: RuntimeConfiguration, ) -> Optional[BytesLike]: return await to_thread(self._compress, chunk_bytes) diff --git a/src/zarr/common.py b/src/zarr/common.py index 7d8431f97e..7ef2fc9a61 100644 --- a/src/zarr/common.py +++ b/src/zarr/common.py @@ -1,5 +1,16 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Union, Tuple, Iterable, Dict, List, TypeVar, overload, Any +from typing import ( + TYPE_CHECKING, + Literal, + Union, + Tuple, + Iterable, + Dict, + List, + TypeVar, + overload, + Any, +) import asyncio import contextvars from dataclasses import dataclass @@ -78,15 +89,20 @@ class ArraySpec: shape: ChunkCoords dtype: np.dtype[Any] fill_value: Any + order: Literal["C", "F"] - def __init__(self, shape: ChunkCoords, dtype: np.dtype[Any], fill_value: Any) -> None: + def __init__( + self, shape: ChunkCoords, dtype: np.dtype[Any], fill_value: Any, order: Literal["C", "F"] + ) -> None: shape_parsed = parse_shapelike(shape) dtype_parsed = parse_dtype(dtype) fill_value_parsed = parse_fill_value(fill_value) + order_parsed = parse_order(order) object.__setattr__(self, "shape", shape_parsed) object.__setattr__(self, "dtype", dtype_parsed) object.__setattr__(self, "fill_value", fill_value_parsed) + object.__setattr__(self, "order", order_parsed) @property def ndim(self) -> int: @@ -159,3 +175,9 @@ def parse_dtype(data: Any) -> np.dtype[Any]: def parse_fill_value(data: Any) -> Any: # todo: real validation return data + + +def parse_order(data: Any) -> Literal["C", "F"]: + if data in ("C", "F"): + return data + raise ValueError(f"Expected one of ('C', 'F'), got {data} instead.") diff --git a/src/zarr/config.py b/src/zarr/config.py index 5e19ade1ad..5a42676e05 100644 --- a/src/zarr/config.py +++ b/src/zarr/config.py @@ -1,8 +1,6 @@ from __future__ import annotations -from asyncio import AbstractEventLoop -from dataclasses import dataclass -from typing import Any, Literal, Optional +from typing import Any, Literal from donfig import Config @@ -17,26 +15,3 @@ def parse_indexing_order(data: Any) -> Literal["C", "F"]: return data msg = f"Expected one of ('C', 'F'), got {data} instead." raise ValueError(msg) - - -def parse_asyncio_loop(data: Any) -> AbstractEventLoop | None: - if data is None or isinstance(data, AbstractEventLoop): - return data - raise TypeError(f"Expected AbstractEventLoop or None, got {type(data)}") - - -@dataclass(frozen=True) -class RuntimeConfiguration: - order: Literal["C", "F"] = "C" - asyncio_loop: Optional[AbstractEventLoop] = None - - def __init__( - self, - order: Literal["C", "F"] = "C", - asyncio_loop: Optional[AbstractEventLoop] = None, - ): - order_parsed = parse_indexing_order(order) - asyncio_loop_parsed = parse_asyncio_loop(asyncio_loop) - - object.__setattr__(self, "order", order_parsed) - object.__setattr__(self, "asyncio_loop_parsed", asyncio_loop_parsed) diff --git a/src/zarr/group.py b/src/zarr/group.py index 3331c2ddfe..c71860b1b6 100644 --- a/src/zarr/group.py +++ b/src/zarr/group.py @@ -18,7 +18,6 @@ from zarr.array import AsyncArray, Array from zarr.attributes import Attributes from zarr.common import ZARR_JSON, ZARRAY_JSON, ZATTRS_JSON, ZGROUP_JSON -from zarr.config import RuntimeConfiguration from zarr.store import StoreLike, StorePath, make_store_path from zarr.sync import SyncMixin, sync @@ -78,7 +77,6 @@ def to_dict(self) -> dict[str, Any]: class AsyncGroup: metadata: GroupMetadata store_path: StorePath - runtime_configuration: RuntimeConfiguration = RuntimeConfiguration() @classmethod async def create( @@ -88,7 +86,6 @@ async def create( attributes: dict[str, Any] = {}, exists_ok: bool = False, zarr_format: Literal[2, 3] = 3, - runtime_configuration: RuntimeConfiguration = RuntimeConfiguration(), ) -> AsyncGroup: store_path = make_store_path(store) if not exists_ok: @@ -99,7 +96,6 @@ async def create( group = cls( metadata=GroupMetadata(attributes=attributes, zarr_format=zarr_format), store_path=store_path, - runtime_configuration=runtime_configuration, ) await group._save_metadata() return group @@ -108,7 +104,6 @@ async def create( async def open( cls, store: StoreLike, - runtime_configuration: RuntimeConfiguration = RuntimeConfiguration(), zarr_format: Literal[2, 3, None] = 3, ) -> AsyncGroup: store_path = make_store_path(store) @@ -154,19 +149,17 @@ async def open( assert zarr_json_bytes is not None group_metadata = json.loads(zarr_json_bytes) - return cls.from_dict(store_path, group_metadata, runtime_configuration) + return cls.from_dict(store_path, group_metadata) @classmethod def from_dict( cls, store_path: StorePath, data: dict[str, Any], - runtime_configuration: RuntimeConfiguration, ) -> AsyncGroup: group = cls( metadata=GroupMetadata.from_dict(data), store_path=store_path, - runtime_configuration=runtime_configuration, ) return group @@ -193,11 +186,9 @@ async def getitem( else: zarr_json = json.loads(zarr_json_bytes) if zarr_json["node_type"] == "group": - return type(self).from_dict(store_path, zarr_json, self.runtime_configuration) + return type(self).from_dict(store_path, zarr_json) elif zarr_json["node_type"] == "array": - return AsyncArray.from_dict( - store_path, zarr_json, runtime_configuration=self.runtime_configuration - ) + return AsyncArray.from_dict(store_path, zarr_json) else: raise ValueError(f"unexpected node_type: {zarr_json['node_type']}") elif self.metadata.zarr_format == 2: @@ -220,9 +211,7 @@ async def getitem( if zarray is not None: # TODO: update this once the V2 array support is part of the primary array class zarr_json = {**zarray, "attributes": zattrs} - return AsyncArray.from_dict( - store_path, zarray, runtime_configuration=self.runtime_configuration - ) + return AsyncArray.from_dict(store_path, zarray) else: zgroup = ( json.loads(zgroup_bytes) @@ -230,7 +219,7 @@ async def getitem( else {"zarr_format": self.metadata.zarr_format} ) zarr_json = {**zgroup, "attributes": zattrs} - return type(self).from_dict(store_path, zarr_json, self.runtime_configuration) + return type(self).from_dict(store_path, zarr_json) else: raise ValueError(f"unexpected zarr_format: {self.metadata.zarr_format}") @@ -260,18 +249,14 @@ def info(self): return self.metadata.info async def create_group(self, path: str, **kwargs) -> AsyncGroup: - runtime_configuration = kwargs.pop("runtime_configuration", self.runtime_configuration) return await type(self).create( self.store_path / path, - runtime_configuration=runtime_configuration, **kwargs, ) async def create_array(self, path: str, **kwargs) -> AsyncArray: - runtime_configuration = kwargs.pop("runtime_configuration", self.runtime_configuration) return await AsyncArray.create( self.store_path / path, - runtime_configuration=runtime_configuration, **kwargs, ) @@ -410,16 +395,13 @@ def create( *, attributes: dict[str, Any] = {}, exists_ok: bool = False, - runtime_configuration: RuntimeConfiguration = RuntimeConfiguration(), ) -> Group: obj = sync( AsyncGroup.create( store, attributes=attributes, exists_ok=exists_ok, - runtime_configuration=runtime_configuration, ), - loop=runtime_configuration.asyncio_loop, ) return cls(obj) @@ -428,11 +410,8 @@ def create( def open( cls, store: StoreLike, - runtime_configuration: RuntimeConfiguration = RuntimeConfiguration(), ) -> Group: - obj = sync( - AsyncGroup.open(store, runtime_configuration), loop=runtime_configuration.asyncio_loop - ) + obj = sync(AsyncGroup.open(store)) return cls(obj) def __getitem__(self, path: str) -> Array | Group: diff --git a/src/zarr/metadata.py b/src/zarr/metadata.py index 719d22a41c..3903bacd42 100644 --- a/src/zarr/metadata.py +++ b/src/zarr/metadata.py @@ -26,11 +26,7 @@ parse_fill_value, parse_shapelike, ) -from zarr.config import RuntimeConfiguration, parse_indexing_order - - -def runtime_configuration(order: Literal["C", "F"]) -> RuntimeConfiguration: - return RuntimeConfiguration(order=order) +from zarr.config import parse_indexing_order # For type checking @@ -144,7 +140,10 @@ def __init__( attributes_parsed = parse_attributes(attributes) array_spec = ArraySpec( - shape=shape_parsed, dtype=data_type_parsed, fill_value=fill_value_parsed + shape=shape_parsed, + dtype=data_type_parsed, + fill_value=fill_value_parsed, + order="C", # TODO: order is not needed here. ) codecs_parsed = parse_codecs(codecs).evolve(array_spec) @@ -182,7 +181,7 @@ def dtype(self) -> np.dtype[Any]: def ndim(self) -> int: return len(self.shape) - def get_chunk_spec(self, _chunk_coords: ChunkCoords) -> ArraySpec: + def get_chunk_spec(self, _chunk_coords: ChunkCoords, order: Literal["C", "F"]) -> ArraySpec: assert isinstance( self.chunk_grid, RegularChunkGrid ), "Currently, only regular chunk grid is supported" @@ -190,6 +189,7 @@ def get_chunk_spec(self, _chunk_coords: ChunkCoords) -> ArraySpec: shape=self.chunk_grid.chunk_shape, dtype=self.dtype, fill_value=self.fill_value, + order=order, ) def to_bytes(self) -> bytes: diff --git a/src/zarr/sync.py b/src/zarr/sync.py index 6c76064b9a..649db0be76 100644 --- a/src/zarr/sync.py +++ b/src/zarr/sync.py @@ -118,7 +118,6 @@ def _sync(self, coroutine: Coroutine[Any, Any, T]) -> T: # this should allow us to better type the sync wrapper return sync( coroutine, - loop=_get_loop(), timeout=config.get("async.timeout"), ) diff --git a/tests/v3/package_with_entrypoint/__init__.py b/tests/v3/package_with_entrypoint/__init__.py index cf7df20457..8b31733069 100644 --- a/tests/v3/package_with_entrypoint/__init__.py +++ b/tests/v3/package_with_entrypoint/__init__.py @@ -1,7 +1,6 @@ from numpy import ndarray from zarr.abc.codec import ArrayBytesCodec from zarr.common import ArraySpec, BytesLike -from zarr.config import RuntimeConfiguration class TestCodec(ArrayBytesCodec): @@ -11,7 +10,6 @@ async def encode( self, chunk_array: ndarray, chunk_spec: ArraySpec, - runtime_configuration: RuntimeConfiguration, ) -> BytesLike | None: pass @@ -19,7 +17,6 @@ async def decode( self, chunk_bytes: BytesLike, chunk_spec: ArraySpec, - runtime_configuration: RuntimeConfiguration, ) -> ndarray: pass diff --git a/tests/v3/test_codecs.py b/tests/v3/test_codecs.py index ffd225668b..cf0a25de17 100644 --- a/tests/v3/test_codecs.py +++ b/tests/v3/test_codecs.py @@ -21,9 +21,9 @@ TransposeCodec, ZstdCodec, ) -from zarr.metadata import runtime_configuration from zarr.abc.store import Store +from zarr.config import config from zarr.store import MemoryStore, StorePath @@ -255,25 +255,25 @@ async def test_order( else [TransposeCodec(order=order_from_dim(store_order, data.ndim)), BytesCodec()] ) - a = await AsyncArray.create( - store / "order", - shape=data.shape, - chunk_shape=(32, 8), - dtype=data.dtype, - fill_value=0, - chunk_key_encoding=("v2", "."), - codecs=codecs_, - runtime_configuration=runtime_configuration(runtime_write_order), - ) + with config.set({"order": runtime_write_order}): + a = await AsyncArray.create( + store / "order", + shape=data.shape, + chunk_shape=(32, 8), + dtype=data.dtype, + fill_value=0, + chunk_key_encoding=("v2", "."), + codecs=codecs_, + ) await _AsyncArrayProxy(a)[:, :].set(data) read_data = await _AsyncArrayProxy(a)[:, :].get() assert np.array_equal(data, read_data) - a = await AsyncArray.open( - store / "order", - runtime_configuration=runtime_configuration(order=runtime_read_order), - ) + with config.set({"order": runtime_read_order}): + a = await AsyncArray.open( + store / "order", + ) read_data = await _AsyncArrayProxy(a)[:, :].get() assert np.array_equal(data, read_data) @@ -313,22 +313,22 @@ def test_order_implicit( codecs_: Optional[List[Codec]] = [ShardingCodec(chunk_shape=(8, 8))] if with_sharding else None - a = Array.create( - store / "order_implicit", - shape=data.shape, - chunk_shape=(16, 16), - dtype=data.dtype, - fill_value=0, - codecs=codecs_, - runtime_configuration=runtime_configuration(runtime_write_order), - ) + with config.set({"order": runtime_write_order}): + a = Array.create( + store / "order_implicit", + shape=data.shape, + chunk_shape=(16, 16), + dtype=data.dtype, + fill_value=0, + codecs=codecs_, + ) a[:, :] = data - a = Array.open( - store / "order_implicit", - runtime_configuration=runtime_configuration(order=runtime_read_order), - ) + with config.set({"order": runtime_read_order}): + a = Array.open( + store / "order_implicit", + ) read_data = a[:, :] assert np.array_equal(data, read_data) @@ -364,26 +364,25 @@ async def test_transpose( if with_sharding else [TransposeCodec(order=(2, 1, 0)), BytesCodec()] ) - - a = await AsyncArray.create( - store / "transpose", - shape=data.shape, - chunk_shape=(1, 32, 8), - dtype=data.dtype, - fill_value=0, - chunk_key_encoding=("v2", "."), - codecs=codecs_, - runtime_configuration=runtime_configuration(runtime_write_order), - ) + with config.set({"order": runtime_write_order}): + a = await AsyncArray.create( + store / "transpose", + shape=data.shape, + chunk_shape=(1, 32, 8), + dtype=data.dtype, + fill_value=0, + chunk_key_encoding=("v2", "."), + codecs=codecs_, + ) await _AsyncArrayProxy(a)[:, :].set(data) read_data = await _AsyncArrayProxy(a)[:, :].get() assert np.array_equal(data, read_data) - a = await AsyncArray.open( - store / "transpose", - runtime_configuration=runtime_configuration(runtime_read_order), - ) + with config.set({"order": runtime_read_order}): + a = await AsyncArray.open( + store / "transpose", + ) read_data = await _AsyncArrayProxy(a)[:, :].get() assert np.array_equal(data, read_data) diff --git a/tests/v3/test_group.py b/tests/v3/test_group.py index cf5c147c39..11400ef809 100644 --- a/tests/v3/test_group.py +++ b/tests/v3/test_group.py @@ -10,7 +10,6 @@ from zarr.group import AsyncGroup, Group, GroupMetadata from zarr.store import LocalStore, StorePath -from zarr.config import RuntimeConfiguration # todo: put RemoteStore in here @@ -58,7 +57,6 @@ def test_group(store_type, request) -> None: agroup = AsyncGroup( metadata=GroupMetadata(), store_path=store_path, - runtime_configuration=RuntimeConfiguration(), ) group = Group(agroup) assert agroup.metadata is group.metadata @@ -99,7 +97,6 @@ def test_group_sync_constructor(store_path) -> None: group = Group.create( store=store_path, attributes={"title": "test 123"}, - runtime_configuration=RuntimeConfiguration(), ) assert group._async_group.metadata.attributes["title"] == "test 123" From 6f62a8c729d8a868e3499fabe1f14c51b8b6ad43 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Thu, 9 May 2024 15:37:56 -0700 Subject: [PATCH 5/8] fix order constructor --- src/zarr/array.py | 14 ++++++++------ src/zarr/config.py | 2 +- tests/v3/test_codecs.py | 12 ++++++------ 3 files changed, 15 insertions(+), 13 deletions(-) diff --git a/src/zarr/array.py b/src/zarr/array.py index b84f7eca6a..1e014c1ca1 100644 --- a/src/zarr/array.py +++ b/src/zarr/array.py @@ -33,7 +33,7 @@ from zarr.indexing import BasicIndexer, all_chunk_coords, is_total_slice from zarr.chunk_grids import RegularChunkGrid from zarr.chunk_key_encodings import DefaultChunkKeyEncoding, V2ChunkKeyEncoding -from zarr.metadata import ArrayMetadata +from zarr.metadata import ArrayMetadata, parse_indexing_order from zarr.store import StoreLike, StorePath, make_store_path from zarr.sync import sync @@ -51,7 +51,7 @@ def parse_array_metadata(data: Any) -> ArrayMetadata: class AsyncArray: metadata: ArrayMetadata store_path: StorePath - _order: Literal["C", "F"] + order: Literal["C", "F"] @property def codecs(self): @@ -61,12 +61,14 @@ def __init__( self, metadata: ArrayMetadata, store_path: StorePath, + order: Literal["C", "F"] | None = None, ): metadata_parsed = parse_array_metadata(metadata) + order_parsed = parse_indexing_order(order or config.get("array.order")) object.__setattr__(self, "metadata", metadata_parsed) object.__setattr__(self, "store_path", store_path) - object.__setattr__(self, "_order", config.get("order", "C")) + object.__setattr__(self, "order", order_parsed) @classmethod async def create( @@ -192,7 +194,7 @@ async def getitem(self, selection: Selection) -> np.ndarray: out = np.zeros( indexer.shape, dtype=self.metadata.dtype, - order=self._order, + order=self.order, ) # reading chunks and decoding them @@ -220,7 +222,7 @@ async def _read_chunk( out_selection: SliceSelection, out: np.ndarray, ) -> None: - chunk_spec = self.metadata.get_chunk_spec(chunk_coords, self._order) + chunk_spec = self.metadata.get_chunk_spec(chunk_coords, self.order) chunk_key_encoding = self.metadata.chunk_key_encoding chunk_key = chunk_key_encoding.encode_chunk_key(chunk_coords) store_path = self.store_path / chunk_key @@ -286,7 +288,7 @@ async def _write_chunk( chunk_selection: SliceSelection, out_selection: SliceSelection, ) -> None: - chunk_spec = self.metadata.get_chunk_spec(chunk_coords, self._order) + chunk_spec = self.metadata.get_chunk_spec(chunk_coords, self.order) chunk_key_encoding = self.metadata.chunk_key_encoding chunk_key = chunk_key_encoding.encode_chunk_key(chunk_coords) store_path = self.store_path / chunk_key diff --git a/src/zarr/config.py b/src/zarr/config.py index 5a42676e05..e546cb1c23 100644 --- a/src/zarr/config.py +++ b/src/zarr/config.py @@ -6,7 +6,7 @@ config = Config( "zarr", - defaults=[{"async": {"concurrency": None, "timeout": None}}], + defaults=[{"array": {"order": "C"}, "async": {"concurrency": None, "timeout": None}}], ) diff --git a/tests/v3/test_codecs.py b/tests/v3/test_codecs.py index cf0a25de17..e042c7f275 100644 --- a/tests/v3/test_codecs.py +++ b/tests/v3/test_codecs.py @@ -255,7 +255,7 @@ async def test_order( else [TransposeCodec(order=order_from_dim(store_order, data.ndim)), BytesCodec()] ) - with config.set({"order": runtime_write_order}): + with config.set({"array.order": runtime_write_order}): a = await AsyncArray.create( store / "order", shape=data.shape, @@ -270,7 +270,7 @@ async def test_order( read_data = await _AsyncArrayProxy(a)[:, :].get() assert np.array_equal(data, read_data) - with config.set({"order": runtime_read_order}): + with config.set({"array.order": runtime_read_order}): a = await AsyncArray.open( store / "order", ) @@ -313,7 +313,7 @@ def test_order_implicit( codecs_: Optional[List[Codec]] = [ShardingCodec(chunk_shape=(8, 8))] if with_sharding else None - with config.set({"order": runtime_write_order}): + with config.set({"array.order": runtime_write_order}): a = Array.create( store / "order_implicit", shape=data.shape, @@ -325,7 +325,7 @@ def test_order_implicit( a[:, :] = data - with config.set({"order": runtime_read_order}): + with config.set({"array.order": runtime_read_order}): a = Array.open( store / "order_implicit", ) @@ -364,7 +364,7 @@ async def test_transpose( if with_sharding else [TransposeCodec(order=(2, 1, 0)), BytesCodec()] ) - with config.set({"order": runtime_write_order}): + with config.set({"array.order": runtime_write_order}): a = await AsyncArray.create( store / "transpose", shape=data.shape, @@ -379,7 +379,7 @@ async def test_transpose( read_data = await _AsyncArrayProxy(a)[:, :].get() assert np.array_equal(data, read_data) - with config.set({"order": runtime_read_order}): + with config.set({"array.order": runtime_read_order}): a = await AsyncArray.open( store / "transpose", ) From 7b16d11bef6771f91262e60c0d7d68afe8d9182a Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Fri, 10 May 2024 14:14:16 -0700 Subject: [PATCH 6/8] add basic tests for config state --- tests/v3/test_config.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 tests/v3/test_config.py diff --git a/tests/v3/test_config.py b/tests/v3/test_config.py new file mode 100644 index 0000000000..43acdec5fa --- /dev/null +++ b/tests/v3/test_config.py @@ -0,0 +1,15 @@ +from zarr.config import config + + +def test_config_defaults_set(): + # regression test for available defaults + assert config.defaults == [ + {"array": {"order": "C"}, "async": {"concurrency": None, "timeout": None}} + ] + assert config.get("array.order") == "C" + + +def test_config_defaults_can_be_overridden(): + assert config.get("array.order") == "C" + with config.set({"array.order": "F"}): + assert config.get("array.order") == "F" From e3d6649171f404ac35d434be8d91761c0254ff6b Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Fri, 10 May 2024 14:18:04 -0700 Subject: [PATCH 7/8] add order property to Array --- src/zarr/array.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/zarr/array.py b/src/zarr/array.py index 1e014c1ca1..9f4ec911b4 100644 --- a/src/zarr/array.py +++ b/src/zarr/array.py @@ -485,6 +485,10 @@ def metadata(self) -> ArrayMetadata: def store_path(self) -> StorePath: return self._async_array.store_path + @property + def order(self) -> Literal["C", "F"]: + return self._async_array.order + def __getitem__(self, selection: Selection) -> np.ndarray: return sync( self._async_array.getitem(selection), From 52e8f5f1eb8444d266dcef193f282f485bd03a97 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Fri, 10 May 2024 14:20:07 -0700 Subject: [PATCH 8/8] update comment in sharding codec --- src/zarr/codecs/sharding.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/zarr/codecs/sharding.py b/src/zarr/codecs/sharding.py index 29da3ac9cf..e94074e63e 100644 --- a/src/zarr/codecs/sharding.py +++ b/src/zarr/codecs/sharding.py @@ -621,7 +621,7 @@ def _get_index_chunk_spec(self, chunks_per_shard: ChunkCoords) -> ArraySpec: shape=chunks_per_shard + (2,), dtype=np.dtype("