From f5e3f789d85507f1bf7e8b4078f07a92fc315051 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Fri, 21 Feb 2025 13:43:34 +0100
Subject: [PATCH 001/130] modernize typing

---
 src/zarr/core/strings.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/src/zarr/core/strings.py b/src/zarr/core/strings.py
index ffca0c3b0c..5bc7ceece5 100644
--- a/src/zarr/core/strings.py
+++ b/src/zarr/core/strings.py
@@ -2,7 +2,9 @@
 different versions of Numpy.
 """
 
-from typing import Any, Union, cast
+from __future__ import annotations
+
+from typing import Any, cast
 from warnings import warn
 
 import numpy as np
@@ -11,13 +13,13 @@
 # when reading data back from Zarr.
 # Any valid string-like datatype should be fine for *setting* data.
 
-_STRING_DTYPE: Union["np.dtypes.StringDType", "np.dtypes.ObjectDType"]
+_STRING_DTYPE: np.dtypes.StringDType | np.dtypes.ObjectDType
 _NUMPY_SUPPORTS_VLEN_STRING: bool
 
 
 def cast_array(
     data: np.ndarray[Any, np.dtype[Any]],
-) -> np.ndarray[Any, Union["np.dtypes.StringDType", "np.dtypes.ObjectDType"]]:
+) -> np.ndarray[Any, np.dtypes.StringDType | np.dtypes.ObjectDType]:
     raise NotImplementedError
 
 
@@ -39,14 +41,14 @@ def cast_array(
 
     def cast_array(
         data: np.ndarray[Any, np.dtype[Any]],
-    ) -> np.ndarray[Any, Union["np.dtypes.StringDType", "np.dtypes.ObjectDType"]]:
+    ) -> np.ndarray[Any, np.dtypes.StringDType | np.dtypes.ObjectDType]:
         out = data.astype(_STRING_DTYPE, copy=False)
         return cast(np.ndarray[Any, np.dtypes.ObjectDType], out)
 
 
 def cast_to_string_dtype(
     data: np.ndarray[Any, np.dtype[Any]], safe: bool = False
-) -> np.ndarray[Any, Union["np.dtypes.StringDType", "np.dtypes.ObjectDType"]]:
+) -> np.ndarray[Any, np.dtypes.StringDType | np.dtypes.ObjectDType]:
     """Take any data and attempt to cast to to our preferred string dtype.
 
     data :  np.ndarray

From 3c50f54942e92ae0f13d960a4e476e63ed31aa54 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Mon, 24 Feb 2025 14:41:25 +0100
Subject: [PATCH 002/130] lint

---
 src/zarr/core/common.py      |  4 ++--
 src/zarr/core/metadata/v3.py |  2 +-
 src/zarr/core/strings.py     | 21 +++++++++++----------
 tests/test_strings.py        | 20 +++++++++++---------
 4 files changed, 25 insertions(+), 22 deletions(-)

diff --git a/src/zarr/core/common.py b/src/zarr/core/common.py
index ad3316b619..e398eff406 100644
--- a/src/zarr/core/common.py
+++ b/src/zarr/core/common.py
@@ -19,7 +19,7 @@
 import numpy as np
 
 from zarr.core.config import config as zarr_config
-from zarr.core.strings import _STRING_DTYPE
+from zarr.core.strings import _VLEN_STRING_DTYPE
 
 if TYPE_CHECKING:
     from collections.abc import Awaitable, Callable, Iterator
@@ -173,7 +173,7 @@ def parse_dtype(dtype: Any, zarr_format: ZarrFormat) -> np.dtype[Any]:
             # special case as object
             return np.dtype("object")
         else:
-            return _STRING_DTYPE
+            return _VLEN_STRING_DTYPE
     return np.dtype(dtype)
 
 
diff --git a/src/zarr/core/metadata/v3.py b/src/zarr/core/metadata/v3.py
index 9154762648..649e79b7ae 100644
--- a/src/zarr/core/metadata/v3.py
+++ b/src/zarr/core/metadata/v3.py
@@ -38,7 +38,7 @@
 from zarr.core.config import config
 from zarr.core.metadata.common import parse_attributes
 from zarr.core.strings import _NUMPY_SUPPORTS_VLEN_STRING
-from zarr.core.strings import _STRING_DTYPE as STRING_NP_DTYPE
+from zarr.core.strings import _VLEN_STRING_DTYPE as STRING_NP_DTYPE
 from zarr.errors import MetadataValidationError, NodeTypeValidationError
 from zarr.registry import get_codec_class
 
diff --git a/src/zarr/core/strings.py b/src/zarr/core/strings.py
index 5bc7ceece5..f14b38840d 100644
--- a/src/zarr/core/strings.py
+++ b/src/zarr/core/strings.py
@@ -13,42 +13,43 @@
 # when reading data back from Zarr.
 # Any valid string-like datatype should be fine for *setting* data.
 
-_STRING_DTYPE: np.dtypes.StringDType | np.dtypes.ObjectDType
+VLenStringType = np.dtypes.StringDType | np.dtypes.ObjectDType
+_VLEN_STRING_DTYPE: VLenStringType
 _NUMPY_SUPPORTS_VLEN_STRING: bool
 
 
 def cast_array(
     data: np.ndarray[Any, np.dtype[Any]],
-) -> np.ndarray[Any, np.dtypes.StringDType | np.dtypes.ObjectDType]:
+) -> np.ndarray[Any, VLenStringType]:
     raise NotImplementedError
 
 
 try:
     # this new vlen string dtype was added in NumPy 2.0
-    _STRING_DTYPE = np.dtypes.StringDType()
+    _VLEN_STRING_DTYPE = np.dtypes.StringDType()
     _NUMPY_SUPPORTS_VLEN_STRING = True
 
     def cast_array(
         data: np.ndarray[Any, np.dtype[Any]],
-    ) -> np.ndarray[Any, np.dtypes.StringDType | np.dtypes.ObjectDType]:
-        out = data.astype(_STRING_DTYPE, copy=False)
+    ) -> np.ndarray[Any, VLenStringType]:
+        out = data.astype(_VLEN_STRING_DTYPE, copy=False)
         return cast(np.ndarray[Any, np.dtypes.StringDType], out)
 
 except AttributeError:
     # if not available, we fall back on an object array of strings, as in Zarr < 3
-    _STRING_DTYPE = np.dtypes.ObjectDType()
+    _VLEN_STRING_DTYPE = np.dtypes.ObjectDType()
     _NUMPY_SUPPORTS_VLEN_STRING = False
 
     def cast_array(
         data: np.ndarray[Any, np.dtype[Any]],
-    ) -> np.ndarray[Any, np.dtypes.StringDType | np.dtypes.ObjectDType]:
-        out = data.astype(_STRING_DTYPE, copy=False)
+    ) -> np.ndarray[Any, VLenStringType]:
+        out = data.astype(_VLEN_STRING_DTYPE, copy=False)
         return cast(np.ndarray[Any, np.dtypes.ObjectDType], out)
 
 
 def cast_to_string_dtype(
     data: np.ndarray[Any, np.dtype[Any]], safe: bool = False
-) -> np.ndarray[Any, np.dtypes.StringDType | np.dtypes.ObjectDType]:
+) -> np.ndarray[Any, VLenStringType]:
     """Take any data and attempt to cast to to our preferred string dtype.
 
     data :  np.ndarray
@@ -63,7 +64,7 @@ def cast_to_string_dtype(
         return cast_array(data)
         # out = data.astype(STRING_DTYPE, copy=False)
         # return cast(np.ndarray[Any, np.dtypes.StringDType | np.dtypes.ObjectDType], out)
-    if _NUMPY_SUPPORTS_VLEN_STRING and np.issubdtype(data.dtype, _STRING_DTYPE):
+    if _NUMPY_SUPPORTS_VLEN_STRING and np.issubdtype(data.dtype, _VLEN_STRING_DTYPE):
         # already a valid string variable length string dtype
         return cast_array(data)
     if np.issubdtype(data.dtype, np.object_):
diff --git a/tests/test_strings.py b/tests/test_strings.py
index dca0570a25..963f2e305e 100644
--- a/tests/test_strings.py
+++ b/tests/test_strings.py
@@ -3,33 +3,35 @@
 import numpy as np
 import pytest
 
-from zarr.core.strings import _NUMPY_SUPPORTS_VLEN_STRING, _STRING_DTYPE, cast_to_string_dtype
+from zarr.core.strings import _NUMPY_SUPPORTS_VLEN_STRING, _VLEN_STRING_DTYPE, cast_to_string_dtype
 
 
 def test_string_defaults() -> None:
     if _NUMPY_SUPPORTS_VLEN_STRING:
-        assert _STRING_DTYPE == np.dtypes.StringDType()
+        assert _VLEN_STRING_DTYPE == np.dtypes.StringDType()
     else:
-        assert _STRING_DTYPE == np.dtypes.ObjectDType()
+        assert _VLEN_STRING_DTYPE == np.dtypes.ObjectDType()
 
 
 def test_cast_to_string_dtype() -> None:
     d1 = np.array(["a", "b", "c"])
     assert d1.dtype == np.dtype("<U1")
     d1s = cast_to_string_dtype(d1)
-    assert d1s.dtype == _STRING_DTYPE
+    assert d1s.dtype == _VLEN_STRING_DTYPE
 
     with pytest.raises(ValueError, match="Cannot cast dtype |S1"):
         cast_to_string_dtype(d1.astype("|S1"))
 
     if _NUMPY_SUPPORTS_VLEN_STRING:
-        assert cast_to_string_dtype(d1.astype("T")).dtype == _STRING_DTYPE
-        assert cast_to_string_dtype(d1.astype("O")).dtype == _STRING_DTYPE
+        assert cast_to_string_dtype(d1.astype("T")).dtype == _VLEN_STRING_DTYPE
+        assert cast_to_string_dtype(d1.astype("O")).dtype == _VLEN_STRING_DTYPE
         with pytest.raises(ValueError, match="Cannot cast object dtype to string dtype"):
             cast_to_string_dtype(np.array([1, "b", "c"], dtype="O"))
     else:
         with pytest.warns():
-            assert cast_to_string_dtype(d1.astype("O")).dtype == _STRING_DTYPE
+            assert cast_to_string_dtype(d1.astype("O")).dtype == _VLEN_STRING_DTYPE
         with pytest.warns():
-            assert cast_to_string_dtype(np.array([1, "b", "c"], dtype="O")).dtype == _STRING_DTYPE
-        assert cast_to_string_dtype(d1.astype("O"), safe=True).dtype == _STRING_DTYPE
+            assert (
+                cast_to_string_dtype(np.array([1, "b", "c"], dtype="O")).dtype == _VLEN_STRING_DTYPE
+            )
+        assert cast_to_string_dtype(d1.astype("O"), safe=True).dtype == _VLEN_STRING_DTYPE

From d74e7a47ebb50429c098efd16a98e016954690e0 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Wed, 26 Feb 2025 09:35:37 +0100
Subject: [PATCH 003/130] new dtypes

---
 src/zarr/core/_info.py          |   6 +-
 src/zarr/core/array.py          |  20 +-
 src/zarr/core/dtype/__init__.py |   3 +
 src/zarr/core/dtype/core.py     | 196 +++++++++++++++++
 src/zarr/core/metadata/dtype.py | 372 ++++++++++++++++++++++++++++++++
 src/zarr/core/metadata/v3.py    |   5 +-
 src/zarr/core/strings.py        |   4 +-
 src/zarr/registry.py            | 106 ++++++++-
 tests/test_array.py             |   2 +-
 tests/test_codecs/test_vlen.py  |  19 +-
 tests/test_metadata/test_v3.py  |   1 -
 11 files changed, 703 insertions(+), 31 deletions(-)
 create mode 100644 src/zarr/core/dtype/__init__.py
 create mode 100644 src/zarr/core/dtype/core.py
 create mode 100644 src/zarr/core/metadata/dtype.py

diff --git a/src/zarr/core/_info.py b/src/zarr/core/_info.py
index 845552c8be..14eb98d6e4 100644
--- a/src/zarr/core/_info.py
+++ b/src/zarr/core/_info.py
@@ -7,7 +7,9 @@
 
 from zarr.abc.codec import ArrayArrayCodec, ArrayBytesCodec, BytesBytesCodec
 from zarr.core.common import ZarrFormat
-from zarr.core.metadata.v3 import DataType
+from zarr.core.metadata.dtype import BaseDataType
+
+# from zarr.core.metadata.v3 import DataType
 
 
 @dataclasses.dataclass(kw_only=True)
@@ -78,7 +80,7 @@ class ArrayInfo:
 
     _type: Literal["Array"] = "Array"
     _zarr_format: ZarrFormat
-    _data_type: np.dtype[Any] | DataType
+    _data_type: np.dtype[Any] | BaseDataType
     _shape: tuple[int, ...]
     _shard_shape: tuple[int, ...] | None = None
     _chunk_shape: tuple[int, ...] | None = None
diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py
index 9c2f8a7260..2bb809037d 100644
--- a/src/zarr/core/array.py
+++ b/src/zarr/core/array.py
@@ -98,19 +98,21 @@
     ArrayV3MetadataDict,
     T_ArrayMetadata,
 )
+from zarr.core.metadata.dtype import BaseDataType
 from zarr.core.metadata.v2 import (
     _default_compressor,
     _default_filters,
     parse_compressor,
     parse_filters,
 )
-from zarr.core.metadata.v3 import DataType, parse_node_type_array
+from zarr.core.metadata.v3 import parse_node_type_array
 from zarr.core.sync import sync
 from zarr.errors import MetadataValidationError
 from zarr.registry import (
     _parse_array_array_codec,
     _parse_array_bytes_codec,
     _parse_bytes_bytes_codec,
+    get_data_type_from_numpy,
     get_pipeline_class,
 )
 from zarr.storage._common import StorePath, ensure_no_existing_node, make_store_path
@@ -1682,7 +1684,7 @@ async def info_complete(self) -> Any:
     def _info(
         self, count_chunks_initialized: int | None = None, count_bytes_stored: int | None = None
     ) -> Any:
-        _data_type: np.dtype[Any] | DataType
+        _data_type: np.dtype[Any] | BaseDataType
         if isinstance(self.metadata, ArrayV2Metadata):
             _data_type = self.metadata.dtype
         else:
@@ -4203,17 +4205,11 @@ def _get_default_chunk_encoding_v3(
     """
     Get the default ArrayArrayCodecs, ArrayBytesCodec, and BytesBytesCodec for a given dtype.
     """
-    dtype = DataType.from_numpy(np_dtype)
-    if dtype == DataType.string:
-        dtype_key = "string"
-    elif dtype == DataType.bytes:
-        dtype_key = "bytes"
-    else:
-        dtype_key = "numeric"
+    dtype = get_data_type_from_numpy(np_dtype)
 
-    default_filters = zarr_config.get("array.v3_default_filters").get(dtype_key)
-    default_serializer = zarr_config.get("array.v3_default_serializer").get(dtype_key)
-    default_compressors = zarr_config.get("array.v3_default_compressors").get(dtype_key)
+    default_filters = zarr_config.get("array.v3_default_filters").get(dtype.type)
+    default_serializer = zarr_config.get("array.v3_default_serializer").get(dtype.type)
+    default_compressors = zarr_config.get("array.v3_default_compressors").get(dtype.type)
 
     filters = tuple(_parse_array_array_codec(codec_dict) for codec_dict in default_filters)
     serializer = _parse_array_bytes_codec(default_serializer)
diff --git a/src/zarr/core/dtype/__init__.py b/src/zarr/core/dtype/__init__.py
new file mode 100644
index 0000000000..58b884ff23
--- /dev/null
+++ b/src/zarr/core/dtype/__init__.py
@@ -0,0 +1,3 @@
+from zarr.core.dtype.core import ZarrDType
+
+__all__ = ["ZarrDType"]
diff --git a/src/zarr/core/dtype/core.py b/src/zarr/core/dtype/core.py
new file mode 100644
index 0000000000..c6460706aa
--- /dev/null
+++ b/src/zarr/core/dtype/core.py
@@ -0,0 +1,196 @@
+"""
+# Overview
+
+This module provides a proof-of-concept standalone interface for managing dtypes in the zarr-python codebase.
+
+The `ZarrDType` class introduced in this module effectively acts as a replacement for `np.dtype` throughout the
+zarr-python codebase. It attempts to encapsulate all relevant runtime information necessary for working with
+dtypes in the context of the Zarr V3 specification (e.g. is this a core dtype or not, how many bytes and what
+endianness is the dtype etc). By providing this abstraction, the module aims to:
+
+- Simplify dtype management within zarr-python
+- Support runtime flexibility and custom extensions
+- Remove unnecessary dependencies on the numpy API
+
+## Extensibility
+
+The module attempts to support user-driven extensions, allowing developers to introduce custom dtypes
+without requiring immediate changes to zarr-python. Extensions can leverage the current entrypoint mechanism,
+enabling integration of experimental features. Over time, widely adopted extensions may be formalized through
+inclusion in zarr-python or standardized via a Zarr Enhancement Proposal (ZEP), but this is not essential.
+
+## Examples
+
+### Core `dtype` Registration
+
+The following example demonstrates how to register a built-in `dtype` in the core codebase:
+
+```python
+from zarr.core.dtype import ZarrDType
+from zarr.registry import register_v3dtype
+
+class Float16(ZarrDType):
+    zarr_spec_format = "3"
+    experimental = False
+    endianness = "little"
+    byte_count = 2
+    to_numpy = np.dtype('float16')
+
+register_v3dtype(Float16)
+```
+
+### Entrypoint Extension
+
+The following example demonstrates how users can register a new `bfloat16` dtype for Zarr.
+This approach adheres to the existing Zarr entrypoint pattern as much as possible, ensuring
+consistency with other extensions. The code below would typically be part of a Python package
+that specifies the entrypoints for the extension:
+
+```python
+import ml_dtypes
+from zarr.core.dtype import ZarrDType  # User inherits from ZarrDType when creating their dtype
+
+class Bfloat16(ZarrDType):
+    zarr_spec_format = "3"
+    experimental = True
+    endianness = "little"
+    byte_count = 2
+    to_numpy = np.dtype('bfloat16')  # Enabled by importing ml_dtypes
+    configuration_v3 = {
+        "version": "example_value",
+        "author": "example_value",
+        "ml_dtypes_version": "example_value"
+    }
+```
+
+### dtype lookup
+
+The following examples demonstrate how to perform a lookup for the relevant ZarrDType, given
+a string that matches the dtype Zarr specification ID, or a numpy dtype object:
+
+```
+from zarr.registry import get_v3dtype_class, get_v3dtype_class_from_numpy
+
+get_v3dtype_class('complex64')  # returns little-endian Complex64 ZarrDType
+get_v3dtype_class('not_registered_dtype')  # ValueError
+
+get_v3dtype_class_from_numpy('>i2')  # returns big-endian Int16 ZarrDType
+get_v3dtype_class_from_numpy(np.dtype('float32'))  # returns little-endian Float32 ZarrDType
+get_v3dtype_class_from_numpy('i10')  # ValueError
+```
+
+### String dtypes
+
+The following indicates one possibility for supporting variable-length strings. It is via the
+entrypoint mechanism as in a previous example. The Apache Arrow specification does not currently
+include a dtype for fixed-length strings (only for fixed-length bytes) and so I am using string
+here to implicitly refer to a variable-length string data (there may be some subtleties with codecs
+that means this needs to be refined further):
+
+```python
+import numpy as np
+from zarr.core.dtype import ZarrDType  # User inherits from ZarrDType when creating their dtype
+
+try:
+    to_numpy = np.dtypes.StringDType()
+except AttributeError:
+    to_numpy = np.dtypes.ObjectDType()
+
+class String(ZarrDType):
+    zarr_spec_format = "3"
+    experimental = True
+    endianness = 'little'
+    byte_count = None  # None is defined to mean variable
+    to_numpy = to_numpy
+```
+
+### int4 dtype
+
+There is currently considerable interest in the AI community in 'quantising' models - storing
+models at reduced precision, while minimising loss of information content. There are a number
+of sub-byte dtypes that the community are using e.g. int4. Unfortunately numpy does not
+currently have support for handling such sub-byte dtypes in an easy way. However, they can
+still be held in a numpy array and then passed (in a zero-copy way) to something like pytorch
+which can handle appropriately:
+
+```python
+import numpy as np
+from zarr.core.dtype import ZarrDType  # User inherits from ZarrDType when creating their dtype
+
+class Int4(ZarrDType):
+    zarr_spec_format = "3"
+    experimental = True
+    endianness = 'little'
+    byte_count = 1  # this is ugly, but I could change this from byte_count to bit_count if there was consensus
+    to_numpy = np.dtype('B')  # could also be np.dtype('V1'), but this would prevent bit-twiddling
+    configuration_v3 = {
+        "version": "example_value",
+        "author": "example_value",
+    }
+```
+"""
+
+from __future__ import annotations
+
+from typing import Any, Literal
+
+import numpy as np
+
+
+class FrozenClassVariables(type):
+    def __setattr__(cls, attr: str, value: object) -> None:
+        if hasattr(cls, attr):
+            raise ValueError(f"Attribute {attr} on ZarrDType class can not be changed once set.")
+        else:
+            raise AttributeError(f"'{cls}' object has no attribute '{attr}'")
+
+
+class ZarrDType(metaclass=FrozenClassVariables):
+    zarr_spec_format: Literal["2", "3"]  # the version of the zarr spec used
+    experimental: bool  # is this in the core spec or not
+    endianness: Literal[
+        "big", "little", None
+    ]  # None indicates not defined i.e. single byte or byte strings
+    byte_count: int | None  # None indicates variable count
+    to_numpy: np.dtype[Any]  # may involve installing a a numpy extension e.g. ml_dtypes;
+
+    configuration_v3: dict | None  # TODO: understand better how this is recommended by the spec
+
+    _zarr_spec_identifier: str  # implementation detail used to map to core spec
+
+    def __init_subclass__(  # enforces all required fields are set and basic sanity checks
+        cls,
+        **kwargs,
+    ) -> None:
+        required_attrs = [
+            "zarr_spec_format",
+            "experimental",
+            "endianness",
+            "byte_count",
+            "to_numpy",
+        ]
+        for attr in required_attrs:
+            if not hasattr(cls, attr):
+                raise ValueError(f"{attr} is a required attribute for a Zarr dtype.")
+
+        if not hasattr(cls, "configuration_v3"):
+            cls.configuration_v3 = None
+
+        cls._zarr_spec_identifier = (
+            "big_" + cls.__qualname__.lower()
+            if cls.endianness == "big"
+            else cls.__qualname__.lower()
+        )  # how this dtype is identified in core spec; convention is prefix with big_ for big-endian
+
+        cls._validate()  # sanity check on basic requirements
+
+        super().__init_subclass__(**kwargs)
+
+    # TODO: add further checks
+    @classmethod
+    def _validate(cls):
+        if cls.byte_count is not None and cls.byte_count <= 0:
+            raise ValueError("byte_count must be a positive integer.")
+
+        if cls.byte_count == 1 and cls.endianness is not None:
+            raise ValueError("Endianness must be None for single-byte types.")
diff --git a/src/zarr/core/metadata/dtype.py b/src/zarr/core/metadata/dtype.py
new file mode 100644
index 0000000000..ab101f2fad
--- /dev/null
+++ b/src/zarr/core/metadata/dtype.py
@@ -0,0 +1,372 @@
+from abc import ABC
+from dataclasses import dataclass, field
+from typing import Any, ClassVar, Literal, Self, get_args
+
+import numpy as np
+
+from zarr.abc.metadata import Metadata
+from zarr.core.common import JSON
+from zarr.core.strings import _NUMPY_SUPPORTS_VLEN_STRING
+from zarr.registry import register_data_type
+
+Endianness = Literal["little", "big", "native"]
+DataTypeFlavor = Literal["boolean", "numeric", "string", "bytes"]
+
+
+def endianness_to_numpy_str(endianness: Endianness | None) -> Literal[">", "<", "=", "|"]:
+    match endianness:
+        case "little":
+            return "<"
+        case "big":
+            return ">"
+        case "native":
+            return "="
+        case None:
+            return "|"
+    raise ValueError(
+        f"Invalid endianness: {endianness}. Expected one of {get_args(endianness)} or None"
+    )
+
+
+class BaseDataType(ABC, Metadata):
+    name: ClassVar[str]
+    numpy_character_code: ClassVar[str]
+    item_size: ClassVar[int | None]
+    type: ClassVar[DataTypeFlavor]
+    capacity: int
+
+    def __init_subclass__(cls, **kwargs: object) -> None:
+        required_attrs = [
+            "name",
+            "numpy_character_code",
+            "item_size",
+            "type",
+        ]
+        for attr in required_attrs:
+            if not hasattr(cls, attr):
+                raise ValueError(f"{attr} is a required attribute for a Zarr dtype.")
+
+        return super().__init_subclass__(**kwargs)
+
+    def to_dict(self) -> dict[str, JSON]:
+        return {"name": self.name}
+
+    def to_numpy(self: Self, *, endianness: Endianness | None = None) -> np.dtype[Any]:
+        endian_str = endianness_to_numpy_str(endianness)
+        return np.dtype(endian_str + self.numpy_character_code)
+
+
+@dataclass(frozen=True, kw_only=True)
+class Bool(BaseDataType):
+    name = "bool"
+    item_size = 1
+    type = "boolean"
+    numpy_character_code = "?"
+    capacity: int = field(default=1, init=False)
+
+    def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.BoolDType:
+        return super().to_numpy(endianness=endianness)
+
+
+register_data_type(Bool)
+
+
+@dataclass(frozen=True, kw_only=True)
+class Int8(BaseDataType):
+    name = "int8"
+    item_size = 1
+    type = "numeric"
+    numpy_character_code = "b"
+    capacity: int = field(default=1, init=False)
+
+    def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.Int8DType:
+        return super().to_numpy(endianness=endianness)
+
+
+register_data_type(Int8)
+
+
+@dataclass(frozen=True, kw_only=True)
+class UInt8(BaseDataType):
+    name = "uint8"
+    item_size = 2
+    type = "numeric"
+    numpy_character_code = "B"
+    capacity: int = field(default=1, init=False)
+
+    def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.UInt8DType:
+        return super().to_numpy(endianness=endianness)
+
+
+register_data_type(UInt8)
+
+
+@dataclass(frozen=True, kw_only=True)
+class Int16(BaseDataType):
+    name = "int16"
+    item_size = 2
+    type = "numeric"
+    numpy_character_code = "h"
+    capacity: int = field(default=1, init=False)
+
+    def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.Int16DType:
+        return super().to_numpy(endianness=endianness)
+
+
+register_data_type(Int16)
+
+
+@dataclass(frozen=True, kw_only=True)
+class UInt16(BaseDataType):
+    name = "uint16"
+    item_size = 2
+    type = "numeric"
+    numpy_character_code = "H"
+    capacity: int = field(default=1, init=False)
+
+    def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.UInt16DType:
+        return super().to_numpy(endianness=endianness)
+
+
+register_data_type(UInt16)
+
+
+@dataclass(frozen=True, kw_only=True)
+class Int32(BaseDataType):
+    name = "int32"
+    item_size = 4
+    type = "numeric"
+    numpy_character_code = "i"
+    capacity: int = field(default=1, init=False)
+
+    def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.Int32DType:
+        return super().to_numpy(endianness=endianness)
+
+
+register_data_type(Int32)
+
+
+@dataclass(frozen=True, kw_only=True)
+class UInt32(BaseDataType):
+    name = "uint32"
+    item_size = 4
+    type = "numeric"
+    numpy_character_code = "I"
+    capacity: int = field(default=1, init=False)
+
+    def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.UInt32DType:
+        return super().to_numpy(endianness=endianness)
+
+
+register_data_type(UInt32)
+
+
+@dataclass(frozen=True, kw_only=True)
+class Int64(BaseDataType):
+    name = "int64"
+    item_size = 8
+    type = "numeric"
+    numpy_character_code = "l"
+    capacity: int = field(default=1, init=False)
+
+    def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.Int64DType:
+        return super().to_numpy(endianness=endianness)
+
+
+register_data_type(Int64)
+
+
+@dataclass(frozen=True, kw_only=True)
+class UInt64(BaseDataType):
+    name = "uint64"
+    item_size = 8
+    type = "numeric"
+    numpy_character_code = "L"
+    capacity: int = field(default=1, init=False)
+
+    def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.UInt64DType:
+        return super().to_numpy(endianness=endianness)
+
+
+register_data_type(UInt64)
+
+
+@dataclass(frozen=True, kw_only=True)
+class Float16(BaseDataType):
+    name = "float16"
+    item_size = 2
+    type = "numeric"
+    numpy_character_code = "e"
+    capacity: int = field(default=1, init=False)
+
+    def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.Float16DType:
+        return super().to_numpy(endianness=endianness)
+
+
+register_data_type(Float16)
+
+
+@dataclass(frozen=True, kw_only=True)
+class Float32(BaseDataType):
+    name = "float32"
+    item_size = 4
+    type = "numeric"
+    numpy_character_code = "f"
+    capacity: int = field(default=1, init=False)
+
+    def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.Float32DType:
+        return super().to_numpy(endianness=endianness)
+
+
+register_data_type(Float32)
+
+
+@dataclass(frozen=True, kw_only=True)
+class Float64(BaseDataType):
+    name = "float64"
+    item_size = 8
+    type = "numeric"
+    numpy_character_code = "d"
+    capacity: int = field(default=1, init=False)
+
+    def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.Float64DType:
+        return super().to_numpy(endianness=endianness)
+
+
+register_data_type(Float64)
+
+
+@dataclass(frozen=True, kw_only=True)
+class Complex64(BaseDataType):
+    name = "complex64"
+    item_size = 16
+    type = "numeric"
+    numpy_character_code = "F"
+    capacity: int = field(default=1, init=False)
+
+    def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.Complex64DType:
+        return super().to_numpy(endianness=endianness)
+
+
+register_data_type(Complex64)
+
+
+@dataclass(frozen=True, kw_only=True)
+class Complex128(BaseDataType):
+    name = "complex64"
+    item_size = 32
+    type = "numeric"
+    numpy_character_code = "D"
+    capacity: int = field(default=1, init=False)
+
+    def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.Complex128DType:
+        return super().to_numpy(endianness=endianness)
+
+
+register_data_type(Complex128)
+
+
+@dataclass(frozen=True, kw_only=True)
+class StaticByteString(BaseDataType):
+    name = "numpy/static_byte_string"
+    type = "string"
+    numpy_character_code = "S"
+    item_size = 1
+    capacity: int
+
+    def to_dict(self) -> dict[str, JSON]:
+        return {"name": self.name, "configuration": {"capacity": self.capacity}}
+
+    def to_numpy(self, endianness: Endianness | None = "native") -> np.dtype[np.bytes_]:
+        endianness_code = endianness_to_numpy_str(endianness)
+        return np.dtype(endianness_code + self.numpy_character_code + str(self.capacity))
+
+
+register_data_type(StaticByteString)
+
+if _NUMPY_SUPPORTS_VLEN_STRING:
+
+    @dataclass(frozen=True, kw_only=True)
+    class VlenString(BaseDataType):
+        name = "numpy/vlen_string"
+        type = "string"
+        numpy_character_code = "T"
+        item_size = None
+        capacity: int
+
+        def to_dict(self) -> dict[str, JSON]:
+            return {"name": self.name, "configuration": {"capacity": self.capacity}}
+
+        def to_numpy(
+            self, endianness: Endianness | None = "native"
+        ) -> np.dtype[np.dtypes.StringDType]:
+            endianness_code = endianness_to_numpy_str(endianness)
+            return np.dtype(endianness_code + self.numpy_character_code)
+
+else:
+
+    @dataclass(frozen=True, kw_only=True)
+    class VlenString(BaseDataType):
+        name = "numpy/vlen_string"
+        type = "string"
+        numpy_character_code = "O"
+        item_size = None
+        capacity: int
+
+        def to_dict(self) -> dict[str, JSON]:
+            return {"name": self.name, "configuration": {"capacity": self.capacity}}
+
+        def to_numpy(
+            self, endianness: Endianness | None = "native"
+        ) -> np.dtype[np.dtypes.ObjectDType]:
+            endianness_code = endianness_to_numpy_str(endianness)
+            return np.dtype(endianness_code + self.numpy_character_code)
+
+
+register_data_type(VlenString)
+
+
+@dataclass(frozen=True, kw_only=True)
+class StaticUnicodeString(BaseDataType):
+    name = "numpy/static_unicode_string"
+    type = "string"
+    numpy_character_code = "U"
+    item_size = 4
+    capacity: int
+
+    def to_dict(self) -> dict[str, JSON]:
+        return {"name": self.name, "configuration": {"capacity": self.capacity}}
+
+    def to_numpy(self, endianness: Endianness | None = "native") -> np.dtype[np.str_]:
+        endianness_code = endianness_to_numpy_str(endianness)
+        return np.dtype(endianness_code + self.numpy_character_code + str(self.capacity))
+
+
+register_data_type(StaticUnicodeString)
+
+
+@dataclass(frozen=True, kw_only=True)
+class StaticRawBytes(BaseDataType):
+    name = "r*"
+    type = "bytes"
+    numpy_character_code = "V"
+    item_size = 1
+    capacity: int
+
+    def to_dict(self) -> dict[str, JSON]:
+        return {"name": f"r{self.capacity * 8}"}
+
+    def to_numpy(self, endianness: Endianness | None = "native") -> np.dtype[np.void]:
+        endianness_code = endianness_to_numpy_str(endianness)
+        return np.dtype(endianness_code + self.numpy_character_code + str(self.capacity))
+
+
+def parse_dtype(dtype: npt.DtypeLike | BaseDataType) -> BaseDataType:
+    from zarr.registry import get_data_type_from_numpy
+
+    if isinstance(dtype, BaseDataType):
+        return dtype
+    return get_data_type_from_numpy(dtype)
+
+
+register_data_type(StaticRawBytes)
diff --git a/src/zarr/core/metadata/v3.py b/src/zarr/core/metadata/v3.py
index 649e79b7ae..86503e64cd 100644
--- a/src/zarr/core/metadata/v3.py
+++ b/src/zarr/core/metadata/v3.py
@@ -5,6 +5,7 @@
 
 from zarr.abc.metadata import Metadata
 from zarr.core.buffer.core import default_buffer_prototype
+from zarr.core.metadata.dtype import BaseDataType
 
 if TYPE_CHECKING:
     from collections.abc import Callable
@@ -251,7 +252,7 @@ def __init__(
         self,
         *,
         shape: Iterable[int],
-        data_type: npt.DTypeLike | DataType,
+        data_type: npt.DTypeLike | BaseDataType,
         chunk_grid: dict[str, JSON] | ChunkGrid,
         chunk_key_encoding: ChunkKeyEncodingLike,
         fill_value: Any,
@@ -595,7 +596,7 @@ def default_fill_value(dtype: DataType) -> str | bytes | np.generic:
 _bool = bool
 
 
-class DataType(Enum):
+class DataTypex(Enum):
     bool = "bool"
     int8 = "int8"
     int16 = "int16"
diff --git a/src/zarr/core/strings.py b/src/zarr/core/strings.py
index f14b38840d..15c30b6f9b 100644
--- a/src/zarr/core/strings.py
+++ b/src/zarr/core/strings.py
@@ -4,7 +4,7 @@
 
 from __future__ import annotations
 
-from typing import Any, cast
+from typing import Any, Union, cast
 from warnings import warn
 
 import numpy as np
@@ -13,7 +13,7 @@
 # when reading data back from Zarr.
 # Any valid string-like datatype should be fine for *setting* data.
 
-VLenStringType = np.dtypes.StringDType | np.dtypes.ObjectDType
+VLenStringType = Union["np.dtypes.StringDType", "np.dtypes.ObjectDType"]
 _VLEN_STRING_DTYPE: VLenStringType
 _NUMPY_SUPPORTS_VLEN_STRING: bool
 
diff --git a/src/zarr/registry.py b/src/zarr/registry.py
index 704db3f704..480d75d49a 100644
--- a/src/zarr/registry.py
+++ b/src/zarr/registry.py
@@ -5,11 +5,15 @@
 from importlib.metadata import entry_points as get_entry_points
 from typing import TYPE_CHECKING, Any, Generic, TypeVar
 
+import numpy as np
+
 from zarr.core.config import BadConfigError, config
 
 if TYPE_CHECKING:
     from importlib.metadata import EntryPoint
 
+    import numpy.typing as npt
+
     from zarr.abc.codec import (
         ArrayArrayCodec,
         ArrayBytesCodec,
@@ -19,6 +23,8 @@
     )
     from zarr.core.buffer import Buffer, NDBuffer
     from zarr.core.common import JSON
+    from zarr.core.dtype import ZarrDType
+    from zarr.core.metadata.dtype import BaseDataType
 
 __all__ = [
     "Registry",
@@ -26,10 +32,14 @@
     "get_codec_class",
     "get_ndbuffer_class",
     "get_pipeline_class",
+    "get_v2dtype_class",
+    "get_v3dtype_class",
     "register_buffer",
     "register_codec",
     "register_ndbuffer",
     "register_pipeline",
+    "register_v2dtype",
+    "register_v3dtype",
 ]
 
 T = TypeVar("T")
@@ -43,6 +53,7 @@ def __init__(self) -> None:
     def lazy_load(self) -> None:
         for e in self.lazy_load_list:
             self.register(e.load())
+
         self.lazy_load_list.clear()
 
     def register(self, cls: type[T]) -> None:
@@ -53,17 +64,22 @@ def register(self, cls: type[T]) -> None:
 __pipeline_registry: Registry[CodecPipeline] = Registry()
 __buffer_registry: Registry[Buffer] = Registry()
 __ndbuffer_registry: Registry[NDBuffer] = Registry()
+__data_type_registry: Registry[BaseDataType] = Registry()
+__v3_dtype_registry: Registry[ZarrDType] = Registry()
+__v2_dtype_registry: Registry[ZarrDType] = Registry()
 
 """
 The registry module is responsible for managing implementations of codecs,
 pipelines, buffers and ndbuffers and collecting them from entrypoints.
 The implementation used is determined by the config.
+
+The registry module is also responsible for managing dtypes.
 """
 
 
 def _collect_entrypoints() -> list[Registry[Any]]:
     """
-    Collects codecs, pipelines, buffers and ndbuffers from entrypoints.
+    Collects codecs, pipelines, dtypes, buffers and ndbuffers from entrypoints.
     Entry points can either be single items or groups of items.
     Allowed syntax for entry_points.txt is e.g.
 
@@ -86,6 +102,14 @@ def _collect_entrypoints() -> list[Registry[Any]]:
     __buffer_registry.lazy_load_list.extend(entry_points.select(group="zarr", name="buffer"))
     __ndbuffer_registry.lazy_load_list.extend(entry_points.select(group="zarr.ndbuffer"))
     __ndbuffer_registry.lazy_load_list.extend(entry_points.select(group="zarr", name="ndbuffer"))
+
+    __data_type_registry.lazy_load_list.extend(entry_points.select(group="zarr.data_type"))
+    __data_type_registry.lazy_load_list.extend(entry_points.select(group="zarr", name="data_type"))
+
+    __v3_dtype_registry.lazy_load_list.extend(entry_points.select(group="zarr.v3dtype"))
+    __v3_dtype_registry.lazy_load_list.extend(entry_points.select(group="zarr", name="v3dtype"))
+    __v2_dtype_registry.lazy_load_list.extend(entry_points.select(group="zarr.v2dtype"))
+    __v2_dtype_registry.lazy_load_list.extend(entry_points.select(group="zarr", name="v2dtype"))
     __pipeline_registry.lazy_load_list.extend(entry_points.select(group="zarr.codec_pipeline"))
     __pipeline_registry.lazy_load_list.extend(
         entry_points.select(group="zarr", name="codec_pipeline")
@@ -131,6 +155,18 @@ def register_buffer(cls: type[Buffer]) -> None:
     __buffer_registry.register(cls)
 
 
+def register_data_type(cls: type[BaseDataType]) -> None:
+    __data_type_registry.register(cls)
+
+
+def register_v3dtype(cls: type[ZarrDType]) -> None:
+    __v3_dtype_registry.register(cls)
+
+
+def register_v2dtype(cls: type[ZarrDType]) -> None:
+    __v2_dtype_registry.register(cls)
+
+
 def get_codec_class(key: str, reload_config: bool = False) -> type[Codec]:
     if reload_config:
         _reload_config()
@@ -148,7 +184,8 @@ def get_codec_class(key: str, reload_config: bool = False) -> type[Codec]:
         if len(codec_classes) == 1:
             return next(iter(codec_classes.values()))
         warnings.warn(
-            f"Codec '{key}' not configured in config. Selecting any implementation.", stacklevel=2
+            f"Codec '{key}' not configured in config. Selecting any implementation.",
+            stacklevel=2,
         )
         return list(codec_classes.values())[-1]
     selected_codec_cls = codec_classes[config_entry]
@@ -266,4 +303,69 @@ def get_ndbuffer_class(reload_config: bool = False) -> type[NDBuffer]:
     )
 
 
+def get_data_type(dtype: str) -> type[BaseDataType]:
+    __data_type_registry.lazy_load()
+    maybe_dtype_cls = __data_type_registry.get(dtype)
+    if maybe_dtype_cls is None:
+        raise ValueError(f"No data type class matching name {dtype}")
+    return maybe_dtype_cls
+
+
+def get_data_type_from_numpy(dtype: npt.DTypeLike) -> type[BaseDataType]:
+    np_dtype = np.dtype(dtype)
+    __data_type_registry.lazy_load()
+    for val in __data_type_registry.values():
+        if val.numpy_character_code == np_dtype.char:
+            return val
+    raise ValueError(
+        f"numpy dtype '{dtype}' does not have a corresponding Zarr dtype in: {list(__data_type_registry)}."
+    )
+
+
+# TODO: merge the get_vXdtype_class_ functions
+# these can be used instead of the various parse_X functions (hopefully)
+def get_v3dtype_class(dtype: str) -> type[ZarrDType]:
+    __v3_dtype_registry.lazy_load()
+    v3dtype_class = __v3_dtype_registry.get(dtype)
+    if v3dtype_class:
+        return v3dtype_class
+    raise ValueError(
+        f"ZarrDType class '{dtype}' not found in registered buffers: {list(__v3_dtype_registry)}."
+    )
+
+
+def get_v3dtype_class_from_numpy(dtype: npt.DTypeLike) -> type[ZarrDType]:
+    __v3_dtype_registry.lazy_load()
+
+    dtype = np.dtype(dtype)
+    for val in __v3_dtype_registry.values():
+        if dtype == val.to_numpy:
+            return val
+    raise ValueError(
+        f"numpy dtype '{dtype}' does not have a corresponding Zarr dtype in: {list(__v3_dtype_registry)}."
+    )
+
+
+def get_v2dtype_class(dtype: str) -> type[ZarrDType]:
+    __v2_dtype_registry.lazy_load()
+    v2dtype_class = __v2_dtype_registry.get(dtype)
+    if v2dtype_class:
+        return v2dtype_class
+    raise ValueError(
+        f"ZarrDType class '{dtype}' not found in registered buffers: {list(__v2_dtype_registry)}."
+    )
+
+
+def get_v2dtype_class_from_numpy(dtype: npt.DTypeLike) -> type[ZarrDType]:
+    __v2_dtype_registry.lazy_load()
+
+    dtype = np.dtype(dtype)
+    for val in __v2_dtype_registry.values():
+        if dtype == val.to_numpy:
+            return val
+    raise ValueError(
+        f"numpy dtype '{dtype}' does not have a corresponding Zarr dtype in: {list(__v2_dtype_registry)}."
+    )
+
+
 _collect_entrypoints()
diff --git a/tests/test_array.py b/tests/test_array.py
index efcf8a6bf9..72c1bbf1b7 100644
--- a/tests/test_array.py
+++ b/tests/test_array.py
@@ -1008,7 +1008,7 @@ async def test_no_filters_compressors(
                 assert arr.serializer == BytesCodec()
 
     @staticmethod
-    @pytest.mark.parametrize("dtype", ["uint8", "float32", "str"])
+    @pytest.mark.parametrize("dtype", ["uint8", "float32", "str", "U3", "S4", "V1"])
     @pytest.mark.parametrize(
         "compressors",
         [
diff --git a/tests/test_codecs/test_vlen.py b/tests/test_codecs/test_vlen.py
index f5599f2ac0..8aeea834ce 100644
--- a/tests/test_codecs/test_vlen.py
+++ b/tests/test_codecs/test_vlen.py
@@ -8,17 +8,18 @@
 from zarr.abc.codec import Codec
 from zarr.abc.store import Store
 from zarr.codecs import ZstdCodec
-from zarr.core.metadata.v3 import ArrayV3Metadata, DataType
+from zarr.core.metadata.v3 import ArrayV3Metadata
 from zarr.core.strings import _NUMPY_SUPPORTS_VLEN_STRING
+from zarr.registry import get_data_type_from_numpy
 from zarr.storage import StorePath
 
-numpy_str_dtypes: list[type | str | None] = [None, str, "str", np.dtypes.StrDType]
-expected_zarr_string_dtype: np.dtype[Any]
+numpy_str_dtypes: list[type | str | None] = [None, str, "str", np.dtypes.StrDType, "S", "U"]
+expected_array_string_dtype: np.dtype[Any]
 if _NUMPY_SUPPORTS_VLEN_STRING:
     numpy_str_dtypes.append(np.dtypes.StringDType)
-    expected_zarr_string_dtype = np.dtypes.StringDType()
+    expected_array_string_dtype = np.dtypes.StringDType()
 else:
-    expected_zarr_string_dtype = np.dtype("O")
+    expected_array_string_dtype = np.dtype("O")
 
 
 @pytest.mark.parametrize("store", ["memory", "local"], indirect=["store"])
@@ -49,15 +50,15 @@ def test_vlen_string(
 
     a[:, :] = data
     assert np.array_equal(data, a[:, :])
-    assert a.metadata.data_type == DataType.string
-    assert a.dtype == expected_zarr_string_dtype
+    assert a.metadata.data_type == get_data_type_from_numpy(dtype)
+    assert a.dtype == expected_array_string_dtype
 
     # test round trip
     b = Array.open(sp)
     assert isinstance(b.metadata, ArrayV3Metadata)  # needed for mypy
     assert np.array_equal(data, b[:, :])
-    assert b.metadata.data_type == DataType.string
-    assert a.dtype == expected_zarr_string_dtype
+    assert b.metadata.data_type == get_data_type_from_numpy(dtype)
+    assert a.dtype == expected_array_string_dtype
 
 
 @pytest.mark.parametrize("store", ["memory", "local"], indirect=["store"])
diff --git a/tests/test_metadata/test_v3.py b/tests/test_metadata/test_v3.py
index a47cbf43bb..4f6b2a5de6 100644
--- a/tests/test_metadata/test_v3.py
+++ b/tests/test_metadata/test_v3.py
@@ -14,7 +14,6 @@
 from zarr.core.group import GroupMetadata, parse_node_type
 from zarr.core.metadata.v3 import (
     ArrayV3Metadata,
-    DataType,
     default_fill_value,
     parse_dimension_names,
     parse_fill_value,

From 5000dcb616aabda90a91e02a8d27bc02ce54f63d Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Wed, 26 Feb 2025 14:56:13 +0100
Subject: [PATCH 004/130] rename base dtype, change type to kind

---
 src/zarr/core/_info.py          |   4 +-
 src/zarr/core/array.py          |  12 +-
 src/zarr/core/common.py         |   9 +-
 src/zarr/core/metadata/dtype.py | 192 +++++++++++++++++++-------------
 src/zarr/core/metadata/v3.py    |   8 +-
 src/zarr/registry.py            |  10 +-
 6 files changed, 130 insertions(+), 105 deletions(-)

diff --git a/src/zarr/core/_info.py b/src/zarr/core/_info.py
index 14eb98d6e4..d2b23d8b5f 100644
--- a/src/zarr/core/_info.py
+++ b/src/zarr/core/_info.py
@@ -7,7 +7,7 @@
 
 from zarr.abc.codec import ArrayArrayCodec, ArrayBytesCodec, BytesBytesCodec
 from zarr.core.common import ZarrFormat
-from zarr.core.metadata.dtype import BaseDataType
+from zarr.core.metadata.dtype import DtypeBase
 
 # from zarr.core.metadata.v3 import DataType
 
@@ -80,7 +80,7 @@ class ArrayInfo:
 
     _type: Literal["Array"] = "Array"
     _zarr_format: ZarrFormat
-    _data_type: np.dtype[Any] | BaseDataType
+    _data_type: np.dtype[Any] | DtypeBase
     _shape: tuple[int, ...]
     _shard_shape: tuple[int, ...] | None = None
     _chunk_shape: tuple[int, ...] | None = None
diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py
index 2bb809037d..2e15db3790 100644
--- a/src/zarr/core/array.py
+++ b/src/zarr/core/array.py
@@ -98,7 +98,7 @@
     ArrayV3MetadataDict,
     T_ArrayMetadata,
 )
-from zarr.core.metadata.dtype import BaseDataType
+from zarr.core.metadata.dtype import DtypeBase
 from zarr.core.metadata.v2 import (
     _default_compressor,
     _default_filters,
@@ -679,7 +679,7 @@ def _create_metadata_v3(
         """
 
         shape = parse_shapelike(shape)
-        codecs = list(codecs) if codecs is not None else _get_default_codecs(np.dtype(dtype))
+        codecs = list(codecs) if codecs is not None else _get_default_codecs(dtype)
         chunk_key_encoding_parsed: ChunkKeyEncodingLike
         if chunk_key_encoding is None:
             chunk_key_encoding_parsed = {"name": "default", "separator": "/"}
@@ -1684,7 +1684,7 @@ async def info_complete(self) -> Any:
     def _info(
         self, count_chunks_initialized: int | None = None, count_bytes_stored: int | None = None
     ) -> Any:
-        _data_type: np.dtype[Any] | BaseDataType
+        _data_type: np.dtype[Any] | DtypeBase
         if isinstance(self.metadata, ArrayV2Metadata):
             _data_type = self.metadata.dtype
         else:
@@ -4207,9 +4207,9 @@ def _get_default_chunk_encoding_v3(
     """
     dtype = get_data_type_from_numpy(np_dtype)
 
-    default_filters = zarr_config.get("array.v3_default_filters").get(dtype.type)
-    default_serializer = zarr_config.get("array.v3_default_serializer").get(dtype.type)
-    default_compressors = zarr_config.get("array.v3_default_compressors").get(dtype.type)
+    default_filters = zarr_config.get("array.v3_default_filters").get(dtype.kind)
+    default_serializer = zarr_config.get("array.v3_default_serializer").get(dtype.kind)
+    default_compressors = zarr_config.get("array.v3_default_compressors").get(dtype.kind)
 
     filters = tuple(_parse_array_array_codec(codec_dict) for codec_dict in default_filters)
     serializer = _parse_array_bytes_codec(default_serializer)
diff --git a/src/zarr/core/common.py b/src/zarr/core/common.py
index e398eff406..e005cceed0 100644
--- a/src/zarr/core/common.py
+++ b/src/zarr/core/common.py
@@ -19,7 +19,6 @@
 import numpy as np
 
 from zarr.core.config import config as zarr_config
-from zarr.core.strings import _VLEN_STRING_DTYPE
 
 if TYPE_CHECKING:
     from collections.abc import Awaitable, Callable, Iterator
@@ -167,13 +166,7 @@ def parse_bool(data: Any) -> bool:
     raise ValueError(f"Expected bool, got {data} instead.")
 
 
-def parse_dtype(dtype: Any, zarr_format: ZarrFormat) -> np.dtype[Any]:
-    if dtype is str or dtype == "str":
-        if zarr_format == 2:
-            # special case as object
-            return np.dtype("object")
-        else:
-            return _VLEN_STRING_DTYPE
+def parse_dtype(dtype: Any) -> np.dtype[Any]:
     return np.dtype(dtype)
 
 
diff --git a/src/zarr/core/metadata/dtype.py b/src/zarr/core/metadata/dtype.py
index ab101f2fad..f3a571b372 100644
--- a/src/zarr/core/metadata/dtype.py
+++ b/src/zarr/core/metadata/dtype.py
@@ -1,8 +1,9 @@
 from abc import ABC
-from dataclasses import dataclass, field
+from dataclasses import dataclass
 from typing import Any, ClassVar, Literal, Self, get_args
 
 import numpy as np
+import numpy.typing as npt
 
 from zarr.abc.metadata import Metadata
 from zarr.core.common import JSON
@@ -28,19 +29,22 @@ def endianness_to_numpy_str(endianness: Endianness | None) -> Literal[">", "<",
     )
 
 
-class BaseDataType(ABC, Metadata):
+class Flexible:
+    capacity: int
+
+
+class DtypeBase(ABC, Metadata):
     name: ClassVar[str]
     numpy_character_code: ClassVar[str]
     item_size: ClassVar[int | None]
-    type: ClassVar[DataTypeFlavor]
-    capacity: int
+    kind: ClassVar[DataTypeFlavor]
 
     def __init_subclass__(cls, **kwargs: object) -> None:
         required_attrs = [
             "name",
             "numpy_character_code",
             "item_size",
-            "type",
+            "kind",
         ]
         for attr in required_attrs:
             if not hasattr(cls, attr):
@@ -51,18 +55,43 @@ def __init_subclass__(cls, **kwargs: object) -> None:
     def to_dict(self) -> dict[str, JSON]:
         return {"name": self.name}
 
+    @classmethod
+    def from_numpy(cls, dtype: npt.DTypeLike) -> Self:
+        """
+        Create an instance of this dtype from a numpy dtype.
+
+        Parameters
+        ----------
+        dtype : npt.DTypeLike
+            The numpy dtype to create an instance from.
+
+        Returns
+        -------
+        Self
+            An instance of this dtype.
+
+        Raises
+        ------
+        ValueError
+            If the provided numpy dtype does not match this class.
+        """
+        if np.dtype(dtype).char != cls.numpy_character_code:
+            raise ValueError(
+                f"Invalid dtype {dtype}. Expected dtype with character code == {cls.numpy_character_code}."
+            )
+        return cls()
+
     def to_numpy(self: Self, *, endianness: Endianness | None = None) -> np.dtype[Any]:
         endian_str = endianness_to_numpy_str(endianness)
         return np.dtype(endian_str + self.numpy_character_code)
 
 
 @dataclass(frozen=True, kw_only=True)
-class Bool(BaseDataType):
+class Bool(DtypeBase):
     name = "bool"
     item_size = 1
-    type = "boolean"
+    kind = "boolean"
     numpy_character_code = "?"
-    capacity: int = field(default=1, init=False)
 
     def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.BoolDType:
         return super().to_numpy(endianness=endianness)
@@ -72,12 +101,11 @@ def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.BoolDTy
 
 
 @dataclass(frozen=True, kw_only=True)
-class Int8(BaseDataType):
+class Int8(DtypeBase):
     name = "int8"
     item_size = 1
-    type = "numeric"
+    kind = "numeric"
     numpy_character_code = "b"
-    capacity: int = field(default=1, init=False)
 
     def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.Int8DType:
         return super().to_numpy(endianness=endianness)
@@ -87,12 +115,11 @@ def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.Int8DTy
 
 
 @dataclass(frozen=True, kw_only=True)
-class UInt8(BaseDataType):
+class UInt8(DtypeBase):
     name = "uint8"
     item_size = 2
-    type = "numeric"
+    kind = "numeric"
     numpy_character_code = "B"
-    capacity: int = field(default=1, init=False)
 
     def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.UInt8DType:
         return super().to_numpy(endianness=endianness)
@@ -102,12 +129,11 @@ def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.UInt8DT
 
 
 @dataclass(frozen=True, kw_only=True)
-class Int16(BaseDataType):
+class Int16(DtypeBase):
     name = "int16"
     item_size = 2
-    type = "numeric"
+    kind = "numeric"
     numpy_character_code = "h"
-    capacity: int = field(default=1, init=False)
 
     def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.Int16DType:
         return super().to_numpy(endianness=endianness)
@@ -117,12 +143,11 @@ def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.Int16DT
 
 
 @dataclass(frozen=True, kw_only=True)
-class UInt16(BaseDataType):
+class UInt16(DtypeBase):
     name = "uint16"
     item_size = 2
-    type = "numeric"
+    kind = "numeric"
     numpy_character_code = "H"
-    capacity: int = field(default=1, init=False)
 
     def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.UInt16DType:
         return super().to_numpy(endianness=endianness)
@@ -132,12 +157,11 @@ def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.UInt16D
 
 
 @dataclass(frozen=True, kw_only=True)
-class Int32(BaseDataType):
+class Int32(DtypeBase):
     name = "int32"
     item_size = 4
-    type = "numeric"
+    kind = "numeric"
     numpy_character_code = "i"
-    capacity: int = field(default=1, init=False)
 
     def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.Int32DType:
         return super().to_numpy(endianness=endianness)
@@ -147,12 +171,11 @@ def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.Int32DT
 
 
 @dataclass(frozen=True, kw_only=True)
-class UInt32(BaseDataType):
+class UInt32(DtypeBase):
     name = "uint32"
     item_size = 4
-    type = "numeric"
+    kind = "numeric"
     numpy_character_code = "I"
-    capacity: int = field(default=1, init=False)
 
     def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.UInt32DType:
         return super().to_numpy(endianness=endianness)
@@ -162,12 +185,11 @@ def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.UInt32D
 
 
 @dataclass(frozen=True, kw_only=True)
-class Int64(BaseDataType):
+class Int64(DtypeBase):
     name = "int64"
     item_size = 8
-    type = "numeric"
+    kind = "numeric"
     numpy_character_code = "l"
-    capacity: int = field(default=1, init=False)
 
     def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.Int64DType:
         return super().to_numpy(endianness=endianness)
@@ -177,12 +199,11 @@ def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.Int64DT
 
 
 @dataclass(frozen=True, kw_only=True)
-class UInt64(BaseDataType):
+class UInt64(DtypeBase):
     name = "uint64"
     item_size = 8
-    type = "numeric"
+    kind = "numeric"
     numpy_character_code = "L"
-    capacity: int = field(default=1, init=False)
 
     def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.UInt64DType:
         return super().to_numpy(endianness=endianness)
@@ -192,12 +213,11 @@ def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.UInt64D
 
 
 @dataclass(frozen=True, kw_only=True)
-class Float16(BaseDataType):
+class Float16(DtypeBase):
     name = "float16"
     item_size = 2
-    type = "numeric"
+    kind = "numeric"
     numpy_character_code = "e"
-    capacity: int = field(default=1, init=False)
 
     def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.Float16DType:
         return super().to_numpy(endianness=endianness)
@@ -207,12 +227,11 @@ def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.Float16
 
 
 @dataclass(frozen=True, kw_only=True)
-class Float32(BaseDataType):
+class Float32(DtypeBase):
     name = "float32"
     item_size = 4
-    type = "numeric"
+    kind = "numeric"
     numpy_character_code = "f"
-    capacity: int = field(default=1, init=False)
 
     def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.Float32DType:
         return super().to_numpy(endianness=endianness)
@@ -222,12 +241,11 @@ def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.Float32
 
 
 @dataclass(frozen=True, kw_only=True)
-class Float64(BaseDataType):
+class Float64(DtypeBase):
     name = "float64"
     item_size = 8
-    type = "numeric"
+    kind = "numeric"
     numpy_character_code = "d"
-    capacity: int = field(default=1, init=False)
 
     def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.Float64DType:
         return super().to_numpy(endianness=endianness)
@@ -237,12 +255,11 @@ def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.Float64
 
 
 @dataclass(frozen=True, kw_only=True)
-class Complex64(BaseDataType):
+class Complex64(DtypeBase):
     name = "complex64"
     item_size = 16
-    type = "numeric"
+    kind = "numeric"
     numpy_character_code = "F"
-    capacity: int = field(default=1, init=False)
 
     def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.Complex64DType:
         return super().to_numpy(endianness=endianness)
@@ -252,12 +269,11 @@ def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.Complex
 
 
 @dataclass(frozen=True, kw_only=True)
-class Complex128(BaseDataType):
+class Complex128(DtypeBase):
     name = "complex64"
     item_size = 32
-    type = "numeric"
+    kind = "numeric"
     numpy_character_code = "D"
-    capacity: int = field(default=1, init=False)
 
     def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.Complex128DType:
         return super().to_numpy(endianness=endianness)
@@ -267,12 +283,17 @@ def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.Complex
 
 
 @dataclass(frozen=True, kw_only=True)
-class StaticByteString(BaseDataType):
+class StaticByteString(DtypeBase, Flexible):
     name = "numpy/static_byte_string"
-    type = "string"
+    kind = "string"
     numpy_character_code = "S"
     item_size = 1
-    capacity: int
+
+    def from_numpy(cls: type[Self], dtype: npt.DTypeLike) -> Self:
+        dtype = np.dtype(dtype)
+        if dtype.kind != cls.numpy_character_code:
+            raise ValueError(f"Invalid dtype {dtype}. Expected a string dtype.")
+        return cls(capacity=dtype.itemsize)
 
     def to_dict(self) -> dict[str, JSON]:
         return {"name": self.name, "configuration": {"capacity": self.capacity}}
@@ -282,20 +303,42 @@ def to_numpy(self, endianness: Endianness | None = "native") -> np.dtype[np.byte
         return np.dtype(endianness_code + self.numpy_character_code + str(self.capacity))
 
 
+@dataclass(frozen=True, kw_only=True)
+class StaticRawBytes(DtypeBase, Flexible):
+    name = "r*"
+    kind = "bytes"
+    numpy_character_code = "V"
+    item_size = 1
+
+    def from_numpy(cls: type[Self], dtype: npt.DTypeLike) -> Self:
+        dtype = np.dtype(dtype)
+        if dtype.kind != "V":
+            raise ValueError(f"Invalid dtype {dtype}. Expected a bytes dtype.")
+        return cls(capacity=dtype.itemsize)
+
+    def to_dict(self) -> dict[str, JSON]:
+        return {"name": f"r{self.capacity * 8}"}
+
+    def to_numpy(self, endianness: Endianness | None = "native") -> np.dtype[np.void]:
+        endianness_code = endianness_to_numpy_str(endianness)
+        return np.dtype(endianness_code + self.numpy_character_code + str(self.capacity))
+
+
 register_data_type(StaticByteString)
 
 if _NUMPY_SUPPORTS_VLEN_STRING:
 
     @dataclass(frozen=True, kw_only=True)
-    class VlenString(BaseDataType):
+    class VlenString(DtypeBase):
         name = "numpy/vlen_string"
-        type = "string"
+        kind = "string"
         numpy_character_code = "T"
+        # this uses UTF-8, so the encoding of a code point varies between
+        # 1 and 4 bytes
         item_size = None
-        capacity: int
 
         def to_dict(self) -> dict[str, JSON]:
-            return {"name": self.name, "configuration": {"capacity": self.capacity}}
+            return {"name": self.name}
 
         def to_numpy(
             self, endianness: Endianness | None = "native"
@@ -306,15 +349,14 @@ def to_numpy(
 else:
 
     @dataclass(frozen=True, kw_only=True)
-    class VlenString(BaseDataType):
+    class VlenString(DtypeBase):
         name = "numpy/vlen_string"
-        type = "string"
+        kind = "string"
         numpy_character_code = "O"
         item_size = None
-        capacity: int
 
         def to_dict(self) -> dict[str, JSON]:
-            return {"name": self.name, "configuration": {"capacity": self.capacity}}
+            return {"name": self.name}
 
         def to_numpy(
             self, endianness: Endianness | None = "native"
@@ -327,12 +369,17 @@ def to_numpy(
 
 
 @dataclass(frozen=True, kw_only=True)
-class StaticUnicodeString(BaseDataType):
+class StaticUnicodeString(DtypeBase, Flexible):
     name = "numpy/static_unicode_string"
-    type = "string"
+    kind = "string"
     numpy_character_code = "U"
     item_size = 4
-    capacity: int
+
+    def from_numpy(cls: type[Self], dtype: npt.DTypeLike) -> Self:
+        dtype = np.dtype(dtype)
+        if dtype.kind != "U":
+            raise ValueError(f"Invalid dtype {dtype}. Expected a string dtype.")
+        return cls(capacity=dtype.itemsize)
 
     def to_dict(self) -> dict[str, JSON]:
         return {"name": self.name, "configuration": {"capacity": self.capacity}}
@@ -345,28 +392,13 @@ def to_numpy(self, endianness: Endianness | None = "native") -> np.dtype[np.str_
 register_data_type(StaticUnicodeString)
 
 
-@dataclass(frozen=True, kw_only=True)
-class StaticRawBytes(BaseDataType):
-    name = "r*"
-    type = "bytes"
-    numpy_character_code = "V"
-    item_size = 1
-    capacity: int
-
-    def to_dict(self) -> dict[str, JSON]:
-        return {"name": f"r{self.capacity * 8}"}
-
-    def to_numpy(self, endianness: Endianness | None = "native") -> np.dtype[np.void]:
-        endianness_code = endianness_to_numpy_str(endianness)
-        return np.dtype(endianness_code + self.numpy_character_code + str(self.capacity))
-
-
-def parse_dtype(dtype: npt.DtypeLike | BaseDataType) -> BaseDataType:
+def resolve_dtype(dtype: npt.DTypeLike | DtypeBase) -> DtypeBase:
     from zarr.registry import get_data_type_from_numpy
 
-    if isinstance(dtype, BaseDataType):
+    if isinstance(dtype, DtypeBase):
         return dtype
-    return get_data_type_from_numpy(dtype)
+    cls = get_data_type_from_numpy(dtype)
+    return cls.from_numpy(dtype)
 
 
 register_data_type(StaticRawBytes)
diff --git a/src/zarr/core/metadata/v3.py b/src/zarr/core/metadata/v3.py
index 86503e64cd..839459d8e0 100644
--- a/src/zarr/core/metadata/v3.py
+++ b/src/zarr/core/metadata/v3.py
@@ -5,7 +5,7 @@
 
 from zarr.abc.metadata import Metadata
 from zarr.core.buffer.core import default_buffer_prototype
-from zarr.core.metadata.dtype import BaseDataType
+from zarr.core.metadata.dtype import DtypeBase, resolve_dtype
 
 if TYPE_CHECKING:
     from collections.abc import Callable
@@ -237,7 +237,7 @@ class ArrayV3MetadataDict(TypedDict):
 @dataclass(frozen=True, kw_only=True)
 class ArrayV3Metadata(Metadata):
     shape: ChunkCoords
-    data_type: DataType
+    data_type: DtypeBase
     chunk_grid: ChunkGrid
     chunk_key_encoding: ChunkKeyEncoding
     fill_value: Any
@@ -252,7 +252,7 @@ def __init__(
         self,
         *,
         shape: Iterable[int],
-        data_type: npt.DTypeLike | BaseDataType,
+        data_type: npt.DTypeLike | DtypeBase,
         chunk_grid: dict[str, JSON] | ChunkGrid,
         chunk_key_encoding: ChunkKeyEncodingLike,
         fill_value: Any,
@@ -265,7 +265,7 @@ def __init__(
         Because the class is a frozen dataclass, we set attributes using object.__setattr__
         """
         shape_parsed = parse_shapelike(shape)
-        data_type_parsed = DataType.parse(data_type)
+        data_type_parsed = resolve_dtype(data_type)
         chunk_grid_parsed = ChunkGrid.from_dict(chunk_grid)
         chunk_key_encoding_parsed = ChunkKeyEncoding.from_dict(chunk_key_encoding)
         dimension_names_parsed = parse_dimension_names(dimension_names)
diff --git a/src/zarr/registry.py b/src/zarr/registry.py
index 480d75d49a..272a72a16f 100644
--- a/src/zarr/registry.py
+++ b/src/zarr/registry.py
@@ -24,7 +24,7 @@
     from zarr.core.buffer import Buffer, NDBuffer
     from zarr.core.common import JSON
     from zarr.core.dtype import ZarrDType
-    from zarr.core.metadata.dtype import BaseDataType
+    from zarr.core.metadata.dtype import DtypeBase
 
 __all__ = [
     "Registry",
@@ -64,7 +64,7 @@ def register(self, cls: type[T]) -> None:
 __pipeline_registry: Registry[CodecPipeline] = Registry()
 __buffer_registry: Registry[Buffer] = Registry()
 __ndbuffer_registry: Registry[NDBuffer] = Registry()
-__data_type_registry: Registry[BaseDataType] = Registry()
+__data_type_registry: Registry[DtypeBase] = Registry()
 __v3_dtype_registry: Registry[ZarrDType] = Registry()
 __v2_dtype_registry: Registry[ZarrDType] = Registry()
 
@@ -155,7 +155,7 @@ def register_buffer(cls: type[Buffer]) -> None:
     __buffer_registry.register(cls)
 
 
-def register_data_type(cls: type[BaseDataType]) -> None:
+def register_data_type(cls: type[DtypeBase]) -> None:
     __data_type_registry.register(cls)
 
 
@@ -303,7 +303,7 @@ def get_ndbuffer_class(reload_config: bool = False) -> type[NDBuffer]:
     )
 
 
-def get_data_type(dtype: str) -> type[BaseDataType]:
+def get_data_type(dtype: str) -> type[DtypeBase]:
     __data_type_registry.lazy_load()
     maybe_dtype_cls = __data_type_registry.get(dtype)
     if maybe_dtype_cls is None:
@@ -311,7 +311,7 @@ def get_data_type(dtype: str) -> type[BaseDataType]:
     return maybe_dtype_cls
 
 
-def get_data_type_from_numpy(dtype: npt.DTypeLike) -> type[BaseDataType]:
+def get_data_type_from_numpy(dtype: npt.DTypeLike) -> type[DtypeBase]:
     np_dtype = np.dtype(dtype)
     __data_type_registry.lazy_load()
     for val in __data_type_registry.values():

From 9cd5c5197b869110139b8e922b54c29bc9b5b425 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Thu, 27 Feb 2025 09:57:23 +0100
Subject: [PATCH 005/130] start working on JSON serialization

---
 src/zarr/core/_info.py          |   4 +-
 src/zarr/core/array.py          |   6 +-
 src/zarr/core/metadata/dtype.py | 363 ++++++++++++++++++++++++++------
 src/zarr/core/metadata/v3.py    | 207 +++---------------
 src/zarr/registry.py            |  10 +-
 5 files changed, 343 insertions(+), 247 deletions(-)

diff --git a/src/zarr/core/_info.py b/src/zarr/core/_info.py
index d2b23d8b5f..2ede547600 100644
--- a/src/zarr/core/_info.py
+++ b/src/zarr/core/_info.py
@@ -7,7 +7,7 @@
 
 from zarr.abc.codec import ArrayArrayCodec, ArrayBytesCodec, BytesBytesCodec
 from zarr.core.common import ZarrFormat
-from zarr.core.metadata.dtype import DtypeBase
+from zarr.core.metadata.dtype import DTypeBase
 
 # from zarr.core.metadata.v3 import DataType
 
@@ -80,7 +80,7 @@ class ArrayInfo:
 
     _type: Literal["Array"] = "Array"
     _zarr_format: ZarrFormat
-    _data_type: np.dtype[Any] | DtypeBase
+    _data_type: np.dtype[Any] | DTypeBase
     _shape: tuple[int, ...]
     _shard_shape: tuple[int, ...] | None = None
     _chunk_shape: tuple[int, ...] | None = None
diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py
index 2e15db3790..2986b27fb0 100644
--- a/src/zarr/core/array.py
+++ b/src/zarr/core/array.py
@@ -98,7 +98,7 @@
     ArrayV3MetadataDict,
     T_ArrayMetadata,
 )
-from zarr.core.metadata.dtype import DtypeBase
+from zarr.core.metadata.dtype import DTypeBase
 from zarr.core.metadata.v2 import (
     _default_compressor,
     _default_filters,
@@ -1684,7 +1684,7 @@ async def info_complete(self) -> Any:
     def _info(
         self, count_chunks_initialized: int | None = None, count_bytes_stored: int | None = None
     ) -> Any:
-        _data_type: np.dtype[Any] | DtypeBase
+        _data_type: np.dtype[Any] | DTypeBase
         if isinstance(self.metadata, ArrayV2Metadata):
             _data_type = self.metadata.dtype
         else:
@@ -3909,7 +3909,7 @@ async def init_array(
 
     from zarr.codecs.sharding import ShardingCodec, ShardingCodecIndexLocation
 
-    dtype_parsed = parse_dtype(dtype, zarr_format=zarr_format)
+    dtype_parsed = parse_dtype(dtype)
     shape_parsed = parse_shapelike(shape)
     chunk_key_encoding_parsed = _parse_chunk_key_encoding(
         chunk_key_encoding, zarr_format=zarr_format
diff --git a/src/zarr/core/metadata/dtype.py b/src/zarr/core/metadata/dtype.py
index f3a571b372..19a00343c8 100644
--- a/src/zarr/core/metadata/dtype.py
+++ b/src/zarr/core/metadata/dtype.py
@@ -1,17 +1,18 @@
-from abc import ABC
+from abc import ABC, abstractmethod
 from dataclasses import dataclass
-from typing import Any, ClassVar, Literal, Self, get_args
+from typing import Any, ClassVar, Literal, Self, TypeGuard, cast, get_args
 
 import numpy as np
 import numpy.typing as npt
 
 from zarr.abc.metadata import Metadata
-from zarr.core.common import JSON
+from zarr.core.common import JSON, ZarrFormat
 from zarr.core.strings import _NUMPY_SUPPORTS_VLEN_STRING
 from zarr.registry import register_data_type
 
 Endianness = Literal["little", "big", "native"]
 DataTypeFlavor = Literal["boolean", "numeric", "string", "bytes"]
+JSONFloat = float | Literal["NaN", "Infinity", "-Infinity"]
 
 
 def endianness_to_numpy_str(endianness: Endianness | None) -> Literal[">", "<", "=", "|"]:
@@ -29,23 +30,121 @@ def endianness_to_numpy_str(endianness: Endianness | None) -> Literal[">", "<",
     )
 
 
+def check_json_bool(data: JSON) -> TypeGuard[bool]:
+    return bool(isinstance(data, bool))
+
+
+def check_json_int(data: JSON) -> TypeGuard[int]:
+    return bool(isinstance(data, int))
+
+
+def check_json_float(data: JSON) -> TypeGuard[float]:
+    if data == "NaN" or data == "Infinity" or data == "-Infinity":
+        return True
+    else:
+        return bool(isinstance(data, float))
+
+
+def float_to_json_v2(data: float | np.floating[Any]) -> JSONFloat:
+    if np.isnan(data):
+        return "NaN"
+    elif np.isinf(data):
+        return "Infinity" if data > 0 else "-Infinity"
+    return float(data)
+
+
+def float_to_json_v3(data: float | np.floating[Any]) -> JSONFloat:
+    # v3 can in principle handle distinct NaN values, but numpy does not represent these explicitly
+    # so we just re-use the v2 routine here
+    return float_to_json_v2(data)
+
+
+def float_to_json(data: float | np.floating[Any], zarr_format: ZarrFormat) -> JSONFloat:
+    """
+    convert a float to JSON as per the zarr v3 spec
+    """
+    if zarr_format == 2:
+        return float_to_json_v2(data)
+    else:
+        return float_to_json_v3(data)
+    raise ValueError(f"Invalid zarr format: {zarr_format}. Expected 2 or 3.")
+
+
+def complex_to_json_v2(data: complex | np.complex_) -> JSONFloat:
+    return float_to_json_v2(data)
+
+
+def complex_to_json_v3(data: complex | np.complex_) -> tuple[JSONFloat, JSONFloat]:
+    return float_to_json_v3(data.real), float_to_json_v3(data.imag)
+
+
+def complex_to_json(
+    data: complex | np.complex_, zarr_format: ZarrFormat
+) -> tuple[JSONFloat, JSONFloat] | JSONFloat:
+    if zarr_format == 2:
+        return complex_to_json_v2(data)
+    else:
+        return complex_to_json_v3(data)
+    raise ValueError(f"Invalid zarr format: {zarr_format}. Expected 2 or 3.")
+
+
+def float_from_json_v2(data: JSONFloat, dtype: np.floating[Any]) -> np.float_:
+    if data == "NaN":
+        _data = np.nan
+    elif data == "Infinity":
+        _data = np.inf
+    elif data == "-Infinity":
+        _data = -np.inf
+    else:
+        _data = data
+    return dtype.type(_data)
+
+
+def float_from_json_v3(data: JSONFloat, dtype: Any) -> np.floating[Any]:
+    # todo: support the v3-specific NaN handling
+    return float_from_json_v2(data, dtype)
+
+
+def float_from_json(data: JSONFloat, dtype: Any, zarr_format: ZarrFormat) -> np.floating[Any]:
+    if zarr_format == 2:
+        return float_from_json_v2(data, dtype)
+    else:
+        return float_from_json_v3(data, dtype)
+    raise ValueError(f"Invalid zarr format: {zarr_format}. Expected 2 or 3.")
+
+
+def complex_from_json_v2(data: JSONFloat, dtype: Any) -> np.complex_:
+    return dtype.type(data)
+
+
+def complex_from_json_v3(data: tuple[JSONFloat, JSONFloat], dtype: Any) -> np.complex_:
+    return dtype.type(data[0] + 1j * data[1])
+
+
+def complex_from_json(
+    data: tuple[JSONFloat, JSONFloat], dtype: Any, zarr_format: ZarrFormat
+) -> np.complex_:
+    if zarr_format == 2:
+        return complex_from_json_v2(data, dtype)
+    else:
+        return complex_from_json_v3(data, dtype)
+    raise ValueError(f"Invalid zarr format: {zarr_format}. Expected 2 or 3.")
+
+
+@dataclass(frozen=True, kw_only=True)
 class Flexible:
-    capacity: int
+    length: int
 
 
-class DtypeBase(ABC, Metadata):
+class DTypeBase(ABC, Metadata):
     name: ClassVar[str]
     numpy_character_code: ClassVar[str]
     item_size: ClassVar[int | None]
     kind: ClassVar[DataTypeFlavor]
+    default: object
 
     def __init_subclass__(cls, **kwargs: object) -> None:
-        required_attrs = [
-            "name",
-            "numpy_character_code",
-            "item_size",
-            "kind",
-        ]
+        required_attrs = ["name", "numpy_character_code", "item_size", "kind", "default"]
         for attr in required_attrs:
             if not hasattr(cls, attr):
                 raise ValueError(f"{attr} is a required attribute for a Zarr dtype.")
@@ -57,223 +156,356 @@ def to_dict(self) -> dict[str, JSON]:
 
     @classmethod
     def from_numpy(cls, dtype: npt.DTypeLike) -> Self:
-        """
-        Create an instance of this dtype from a numpy dtype.
-
-        Parameters
-        ----------
-        dtype : npt.DTypeLike
-            The numpy dtype to create an instance from.
-
-        Returns
-        -------
-        Self
-            An instance of this dtype.
-
-        Raises
-        ------
-        ValueError
-            If the provided numpy dtype does not match this class.
-        """
         if np.dtype(dtype).char != cls.numpy_character_code:
             raise ValueError(
                 f"Invalid dtype {dtype}. Expected dtype with character code == {cls.numpy_character_code}."
             )
         return cls()
 
+    def default_value(self: Self, *, endianness: Endianness | None = None) -> np.generic:
+        return cast(np.generic, self.to_numpy(endianness=endianness).type(self.default))
+
     def to_numpy(self: Self, *, endianness: Endianness | None = None) -> np.dtype[Any]:
         endian_str = endianness_to_numpy_str(endianness)
         return np.dtype(endian_str + self.numpy_character_code)
 
+    @abstractmethod
+    def to_json_value(self, data: np.generic, *, zarr_format: ZarrFormat) -> JSON:
+        """
+        Convert a single value to JSON-serializable format. Depends on the zarr format.
+        """
+        raise NotImplementedError
+
+    @abstractmethod
+    def from_json_value(
+        self: Self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
+    ) -> np.generic:
+        """
+        Read a JSON-serializable value as a numpy scalar
+        """
+        raise NotImplementedError
+
 
 @dataclass(frozen=True, kw_only=True)
-class Bool(DtypeBase):
+class Bool(DTypeBase):
     name = "bool"
     item_size = 1
     kind = "boolean"
     numpy_character_code = "?"
+    default = False
 
     def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.BoolDType:
         return super().to_numpy(endianness=endianness)
 
+    def to_json_value(self, data: np.generic, zarr_format: ZarrFormat) -> bool:
+        return bool(data)
+
+    def from_json_value(
+        self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
+    ) -> np.bool_:
+        if check_json_bool(data):
+            return self.to_numpy(endianness=endianness).type(data)
+        raise TypeError(f"Invalid type: {data}. Expected a boolean.")
+
 
 register_data_type(Bool)
 
 
 @dataclass(frozen=True, kw_only=True)
-class Int8(DtypeBase):
+class Int8(DTypeBase):
     name = "int8"
     item_size = 1
     kind = "numeric"
     numpy_character_code = "b"
+    default = 0
 
     def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.Int8DType:
         return super().to_numpy(endianness=endianness)
 
+    def to_json_value(self, data: np.generic) -> int:
+        return int(data)
+
+    def from_json_value(
+        self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
+    ) -> np.int8:
+        if check_json_int(data):
+            return self.to_numpy(endianness=endianness).type(data)
+        raise TypeError(f"Invalid type: {data}. Expected an integer.")
+
 
 register_data_type(Int8)
 
 
 @dataclass(frozen=True, kw_only=True)
-class UInt8(DtypeBase):
+class UInt8(DTypeBase):
     name = "uint8"
     item_size = 2
     kind = "numeric"
     numpy_character_code = "B"
+    default = 0
 
     def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.UInt8DType:
         return super().to_numpy(endianness=endianness)
 
+    def to_json_value(self, data: np.generic) -> int:
+        return int(data)
+
+    def from_json_value(
+        self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
+    ) -> np.uint8:
+        if check_json_int(data):
+            return self.to_numpy(endianness=endianness).type(data)
+        raise TypeError(f"Invalid type: {data}. Expected an integer.")
+
 
 register_data_type(UInt8)
 
 
 @dataclass(frozen=True, kw_only=True)
-class Int16(DtypeBase):
+class Int16(DTypeBase):
     name = "int16"
     item_size = 2
     kind = "numeric"
     numpy_character_code = "h"
+    default = 0
 
     def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.Int16DType:
         return super().to_numpy(endianness=endianness)
 
+    def to_json_value(self, data: np.generic) -> int:
+        return int(data)
+
+    def from_json_value(
+        self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
+    ) -> np.int16:
+        if check_json_int(data):
+            return self.to_numpy(endianness=endianness).type(data)
+        raise TypeError(f"Invalid type: {data}. Expected an integer.")
+
 
 register_data_type(Int16)
 
 
 @dataclass(frozen=True, kw_only=True)
-class UInt16(DtypeBase):
+class UInt16(DTypeBase):
     name = "uint16"
     item_size = 2
     kind = "numeric"
     numpy_character_code = "H"
+    default = 0
 
     def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.UInt16DType:
         return super().to_numpy(endianness=endianness)
 
+    def to_json_value(self, data: np.generic) -> int:
+        return int(data)
+
+    def from_json_value(
+        self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
+    ) -> np.uint16:
+        if check_json_int(data):
+            return self.to_numpy(endianness=endianness).type(data)
+        raise TypeError(f"Invalid type: {data}. Expected an integer.")
+
 
 register_data_type(UInt16)
 
 
 @dataclass(frozen=True, kw_only=True)
-class Int32(DtypeBase):
+class Int32(DTypeBase):
     name = "int32"
     item_size = 4
     kind = "numeric"
     numpy_character_code = "i"
+    default = 0
 
     def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.Int32DType:
         return super().to_numpy(endianness=endianness)
 
+    def to_json_value(self, data: np.generic) -> int:
+        return int(data)
+
+    def from_json_value(
+        self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
+    ) -> np.int32:
+        if check_json_int(data):
+            return self.to_numpy(endianness=endianness).type(data)
+        raise TypeError(f"Invalid type: {data}. Expected an integer.")
+
 
 register_data_type(Int32)
 
 
 @dataclass(frozen=True, kw_only=True)
-class UInt32(DtypeBase):
+class UInt32(DTypeBase):
     name = "uint32"
     item_size = 4
     kind = "numeric"
     numpy_character_code = "I"
+    default = 0
 
     def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.UInt32DType:
         return super().to_numpy(endianness=endianness)
 
+    def to_json_value(self, data: np.generic) -> int:
+        return int(data)
+
+    def from_json_value(
+        self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
+    ) -> np.uint32:
+        if check_json_int(data):
+            return self.to_numpy(endianness=endianness).type(data)
+        raise TypeError(f"Invalid type: {data}. Expected an integer.")
+
 
 register_data_type(UInt32)
 
 
 @dataclass(frozen=True, kw_only=True)
-class Int64(DtypeBase):
+class Int64(DTypeBase):
     name = "int64"
     item_size = 8
     kind = "numeric"
     numpy_character_code = "l"
+    default = 0
 
     def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.Int64DType:
         return super().to_numpy(endianness=endianness)
 
+    def to_json_value(self, data: np.generic) -> int:
+        return int(data)
+
+    def from_json_value(self, data: JSON, *, endianness: Endianness | None = None) -> np.int64:
+        if check_json_int(data):
+            return self.to_numpy(endianness=endianness).type(data)
+        raise TypeError(f"Invalid type: {data}. Expected an integer.")
+
 
 register_data_type(Int64)
 
 
 @dataclass(frozen=True, kw_only=True)
-class UInt64(DtypeBase):
+class UInt64(DTypeBase):
     name = "uint64"
     item_size = 8
     kind = "numeric"
     numpy_character_code = "L"
+    default = 0
 
     def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.UInt64DType:
         return super().to_numpy(endianness=endianness)
 
+    def to_json_value(self, data: np.generic) -> int:
+        return int(data)
+
+    def from_json_value(self, data: JSON, *, endianness: Endianness | None = None) -> np.uint64:
+        if check_json_int(data):
+            return self.to_numpy(endianness=endianness).type(data)
+        raise TypeError(f"Invalid type: {data}. Expected an integer.")
+
 
 register_data_type(UInt64)
 
 
 @dataclass(frozen=True, kw_only=True)
-class Float16(DtypeBase):
+class Float16(DTypeBase):
     name = "float16"
     item_size = 2
     kind = "numeric"
     numpy_character_code = "e"
+    default = 0.0
 
     def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.Float16DType:
         return super().to_numpy(endianness=endianness)
 
+    def to_json_value(self, data: np.generic) -> float:
+        return float(data)
+
+    def from_json_value(self, data: JSON, *, endianness: Endianness | None = None) -> np.float16:
+        if check_json_float(data):
+            return self.to_numpy(endianness=endianness).type(data)
+        raise TypeError(f"Invalid type: {data}. Expected a float.")
+
 
 register_data_type(Float16)
 
 
 @dataclass(frozen=True, kw_only=True)
-class Float32(DtypeBase):
+class Float32(DTypeBase):
     name = "float32"
     item_size = 4
     kind = "numeric"
     numpy_character_code = "f"
+    default = 0.0
 
     def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.Float32DType:
         return super().to_numpy(endianness=endianness)
 
+    def to_json_value(self, data: np.generic) -> float:
+        return float(data)
+
+    def from_json_value(self, data: JSON, *, endianness: Endianness | None = None) -> np.float32:
+        if check_json_float(data):
+            return self.to_numpy(endianness=endianness).type(data)
+        raise TypeError(f"Invalid type: {data}. Expected a float.")
+
 
 register_data_type(Float32)
 
 
 @dataclass(frozen=True, kw_only=True)
-class Float64(DtypeBase):
+class Float64(DTypeBase):
     name = "float64"
     item_size = 8
     kind = "numeric"
     numpy_character_code = "d"
+    default = 0.0
 
     def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.Float64DType:
         return super().to_numpy(endianness=endianness)
 
+    def to_json_value(self, data: np.generic) -> float:
+        return float(data)
+
+    def from_json_value(self, data: JSON, *, endianness: Endianness | None = None) -> np.float64:
+        if check_json_float(data):
+            return float_from_json(data, dtype=self.to_numpy(endianness=endianness))
+        raise TypeError(f"Invalid type: {data}. Expected a float.")
+
 
 register_data_type(Float64)
 
 
 @dataclass(frozen=True, kw_only=True)
-class Complex64(DtypeBase):
+class Complex64(DTypeBase):
     name = "complex64"
     item_size = 16
     kind = "numeric"
     numpy_character_code = "F"
+    default = 0.0 + 0.0j
 
     def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.Complex64DType:
         return super().to_numpy(endianness=endianness)
 
+    def to_json_value(self, data: np.generic) -> float:
+        return float(data)
+
+    def from_json_value(self, data: JSON, *, endianness: Endianness | None = None) -> np.complex64:
+        if check_json_float(data):
+            return self.to_numpy(endianness=endianness).type(data)
+        raise TypeError(f"Invalid type: {data}. Expected a float.")
+
 
 register_data_type(Complex64)
 
 
 @dataclass(frozen=True, kw_only=True)
-class Complex128(DtypeBase):
+class Complex128(DTypeBase):
     name = "complex64"
     item_size = 32
     kind = "numeric"
     numpy_character_code = "D"
+    default = 0.0 + 0.0j
 
     def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.Complex128DType:
         return super().to_numpy(endianness=endianness)
@@ -283,45 +515,49 @@ def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.Complex
 
 
 @dataclass(frozen=True, kw_only=True)
-class StaticByteString(DtypeBase, Flexible):
+class StaticByteString(DTypeBase, Flexible):
     name = "numpy/static_byte_string"
     kind = "string"
     numpy_character_code = "S"
     item_size = 1
+    default = b""
 
+    @classmethod
     def from_numpy(cls: type[Self], dtype: npt.DTypeLike) -> Self:
         dtype = np.dtype(dtype)
         if dtype.kind != cls.numpy_character_code:
             raise ValueError(f"Invalid dtype {dtype}. Expected a string dtype.")
-        return cls(capacity=dtype.itemsize)
+        return cls(length=dtype.itemsize)
 
     def to_dict(self) -> dict[str, JSON]:
-        return {"name": self.name, "configuration": {"capacity": self.capacity}}
+        return {"name": self.name, "configuration": {"capacity": self.length}}
 
     def to_numpy(self, endianness: Endianness | None = "native") -> np.dtype[np.bytes_]:
         endianness_code = endianness_to_numpy_str(endianness)
-        return np.dtype(endianness_code + self.numpy_character_code + str(self.capacity))
+        return np.dtype(endianness_code + self.numpy_character_code + str(self.length))
 
 
 @dataclass(frozen=True, kw_only=True)
-class StaticRawBytes(DtypeBase, Flexible):
+class StaticRawBytes(DTypeBase, Flexible):
     name = "r*"
     kind = "bytes"
     numpy_character_code = "V"
     item_size = 1
+    default = b""
 
+    @classmethod
     def from_numpy(cls: type[Self], dtype: npt.DTypeLike) -> Self:
         dtype = np.dtype(dtype)
         if dtype.kind != "V":
             raise ValueError(f"Invalid dtype {dtype}. Expected a bytes dtype.")
-        return cls(capacity=dtype.itemsize)
+        return cls(length=dtype.itemsize)
 
     def to_dict(self) -> dict[str, JSON]:
-        return {"name": f"r{self.capacity * 8}"}
+        return {"name": f"r{self.length * 8}"}
 
     def to_numpy(self, endianness: Endianness | None = "native") -> np.dtype[np.void]:
         endianness_code = endianness_to_numpy_str(endianness)
-        return np.dtype(endianness_code + self.numpy_character_code + str(self.capacity))
+        return np.dtype(endianness_code + self.numpy_character_code + str(self.length))
 
 
 register_data_type(StaticByteString)
@@ -329,13 +565,14 @@ def to_numpy(self, endianness: Endianness | None = "native") -> np.dtype[np.void
 if _NUMPY_SUPPORTS_VLEN_STRING:
 
     @dataclass(frozen=True, kw_only=True)
-    class VlenString(DtypeBase):
+    class VlenString(DTypeBase):
         name = "numpy/vlen_string"
         kind = "string"
         numpy_character_code = "T"
         # this uses UTF-8, so the encoding of a code point varies between
         # 1 and 4 bytes
         item_size = None
+        default = ""
 
         def to_dict(self) -> dict[str, JSON]:
             return {"name": self.name}
@@ -349,11 +586,12 @@ def to_numpy(
 else:
 
     @dataclass(frozen=True, kw_only=True)
-    class VlenString(DtypeBase):
+    class VlenString(DTypeBase):
         name = "numpy/vlen_string"
         kind = "string"
         numpy_character_code = "O"
         item_size = None
+        default = ""
 
         def to_dict(self) -> dict[str, JSON]:
             return {"name": self.name}
@@ -369,36 +607,43 @@ def to_numpy(
 
 
 @dataclass(frozen=True, kw_only=True)
-class StaticUnicodeString(DtypeBase, Flexible):
+class StaticUnicodeString(DTypeBase, Flexible):
     name = "numpy/static_unicode_string"
     kind = "string"
     numpy_character_code = "U"
     item_size = 4
+    default = ""
 
+    @classmethod
     def from_numpy(cls: type[Self], dtype: npt.DTypeLike) -> Self:
         dtype = np.dtype(dtype)
         if dtype.kind != "U":
             raise ValueError(f"Invalid dtype {dtype}. Expected a string dtype.")
-        return cls(capacity=dtype.itemsize)
+        return cls(length=dtype.itemsize)
 
     def to_dict(self) -> dict[str, JSON]:
-        return {"name": self.name, "configuration": {"capacity": self.capacity}}
+        return {"name": self.name, "configuration": {"capacity": self.length}}
 
     def to_numpy(self, endianness: Endianness | None = "native") -> np.dtype[np.str_]:
         endianness_code = endianness_to_numpy_str(endianness)
-        return np.dtype(endianness_code + self.numpy_character_code + str(self.capacity))
+        return np.dtype(endianness_code + self.numpy_character_code + str(self.length))
 
 
 register_data_type(StaticUnicodeString)
 
 
-def resolve_dtype(dtype: npt.DTypeLike | DtypeBase) -> DtypeBase:
+def resolve_dtype(dtype: npt.DTypeLike | DTypeBase) -> DTypeBase:
     from zarr.registry import get_data_type_from_numpy
 
-    if isinstance(dtype, DtypeBase):
+    if isinstance(dtype, DTypeBase):
         return dtype
     cls = get_data_type_from_numpy(dtype)
     return cls.from_numpy(dtype)
 
 
 register_data_type(StaticRawBytes)
+
+INTEGER_DTYPE = Int8 | Int16 | Int32 | Int64 | UInt8 | UInt16 | UInt32 | UInt64
+FLOAT_DTYPE = Float16 | Float32 | Float64
+COMPLEX_DTYPE = Complex64 | Complex128
+STRING_DTYPE = StaticUnicodeString | VlenString | StaticByteString
diff --git a/src/zarr/core/metadata/v3.py b/src/zarr/core/metadata/v3.py
index 839459d8e0..87bb001164 100644
--- a/src/zarr/core/metadata/v3.py
+++ b/src/zarr/core/metadata/v3.py
@@ -5,7 +5,16 @@
 
 from zarr.abc.metadata import Metadata
 from zarr.core.buffer.core import default_buffer_prototype
-from zarr.core.metadata.dtype import DtypeBase, resolve_dtype
+from zarr.core.metadata.dtype import (
+    COMPLEX_DTYPE,
+    FLOAT_DTYPE,
+    INTEGER_DTYPE,
+    STRING_DTYPE,
+    Bool,
+    DTypeBase,
+    StaticRawBytes,
+    resolve_dtype,
+)
 
 if TYPE_CHECKING:
     from collections.abc import Callable
@@ -19,7 +28,7 @@
 from collections.abc import Iterable, Sequence
 from dataclasses import dataclass, field, replace
 from enum import Enum
-from typing import Any, Literal, cast
+from typing import Any, Literal
 
 import numcodecs.abc
 import numpy as np
@@ -38,8 +47,6 @@
 )
 from zarr.core.config import config
 from zarr.core.metadata.common import parse_attributes
-from zarr.core.strings import _NUMPY_SUPPORTS_VLEN_STRING
-from zarr.core.strings import _VLEN_STRING_DTYPE as STRING_NP_DTYPE
 from zarr.errors import MetadataValidationError, NodeTypeValidationError
 from zarr.registry import get_codec_class
 
@@ -94,7 +101,7 @@ def validate_array_bytes_codec(codecs: tuple[Codec, ...]) -> ArrayBytesCodec:
     return abcs[0]
 
 
-def validate_codecs(codecs: tuple[Codec, ...], dtype: DataType) -> None:
+def validate_codecs(codecs: tuple[Codec, ...], dtype: DTypeBase) -> None:
     """Check that the codecs are valid for the given dtype"""
     from zarr.codecs.sharding import ShardingCodec
 
@@ -107,11 +114,11 @@ def validate_codecs(codecs: tuple[Codec, ...], dtype: DataType) -> None:
     # we need to have special codecs if we are decoding vlen strings or bytestrings
     # TODO: use codec ID instead of class name
     codec_class_name = abc.__class__.__name__
-    if dtype == DataType.string and not codec_class_name == "VLenUTF8Codec":
+    if dtype.kind == "string" and not codec_class_name == "VLenUTF8Codec":
         raise ValueError(
             f"For string dtype, ArrayBytesCodec must be `VLenUTF8Codec`, got `{codec_class_name}`."
         )
-    if dtype == DataType.bytes and not codec_class_name == "VLenBytesCodec":
+    if dtype.kind == "bytes" and not codec_class_name == "VLenBytesCodec":
         raise ValueError(
             f"For bytes dtype, ArrayBytesCodec must be `VLenBytesCodec`, got `{codec_class_name}`."
         )
@@ -237,7 +244,7 @@ class ArrayV3MetadataDict(TypedDict):
 @dataclass(frozen=True, kw_only=True)
 class ArrayV3Metadata(Metadata):
     shape: ChunkCoords
-    data_type: DtypeBase
+    data_type: DTypeBase
     chunk_grid: ChunkGrid
     chunk_key_encoding: ChunkKeyEncoding
     fill_value: Any
@@ -252,10 +259,10 @@ def __init__(
         self,
         *,
         shape: Iterable[int],
-        data_type: npt.DTypeLike | DtypeBase,
+        data_type: npt.DTypeLike | DTypeBase,
         chunk_grid: dict[str, JSON] | ChunkGrid,
         chunk_key_encoding: ChunkKeyEncodingLike,
-        fill_value: Any,
+        fill_value: object,
         codecs: Iterable[Codec | dict[str, JSON]],
         attributes: dict[str, JSON] | None,
         dimension_names: Iterable[str] | None,
@@ -269,12 +276,8 @@ def __init__(
         chunk_grid_parsed = ChunkGrid.from_dict(chunk_grid)
         chunk_key_encoding_parsed = ChunkKeyEncoding.from_dict(chunk_key_encoding)
         dimension_names_parsed = parse_dimension_names(dimension_names)
-        if fill_value is None:
-            fill_value = default_fill_value(data_type_parsed)
         # we pass a string here rather than an enum to make mypy happy
-        fill_value_parsed = parse_fill_value(
-            fill_value, dtype=cast(ALL_DTYPES, data_type_parsed.value)
-        )
+        fill_value_parsed = parse_fill_value(fill_value, data_type_parsed)
         attributes_parsed = parse_attributes(attributes)
         codecs_parsed_partial = parse_codecs(codecs)
         storage_transformers_parsed = parse_storage_transformers(storage_transformers)
@@ -433,26 +436,19 @@ def update_attributes(self, attributes: dict[str, JSON]) -> Self:
 # enum Literals can't be used in typing, so we have to restate all of the V3 dtypes as types
 # https://github.com/python/typing/issues/781
 
-BOOL_DTYPE = Literal["bool"]
 BOOL = np.bool_
-INTEGER_DTYPE = Literal["int8", "int16", "int32", "int64", "uint8", "uint16", "uint32", "uint64"]
 INTEGER = np.int8 | np.int16 | np.int32 | np.int64 | np.uint8 | np.uint16 | np.uint32 | np.uint64
-FLOAT_DTYPE = Literal["float16", "float32", "float64"]
 FLOAT = np.float16 | np.float32 | np.float64
-COMPLEX_DTYPE = Literal["complex64", "complex128"]
 COMPLEX = np.complex64 | np.complex128
-STRING_DTYPE = Literal["string"]
+
 STRING = np.str_
-BYTES_DTYPE = Literal["bytes"]
 BYTES = np.bytes_
 
-ALL_DTYPES = BOOL_DTYPE | INTEGER_DTYPE | FLOAT_DTYPE | COMPLEX_DTYPE | STRING_DTYPE | BYTES_DTYPE
-
 
 @overload
 def parse_fill_value(
     fill_value: complex | str | bytes | np.generic | Sequence[Any] | bool,
-    dtype: BOOL_DTYPE,
+    dtype: Bool,
 ) -> BOOL: ...
 
 
@@ -487,14 +483,14 @@ def parse_fill_value(
 @overload
 def parse_fill_value(
     fill_value: complex | str | bytes | np.generic | Sequence[Any] | bool,
-    dtype: BYTES_DTYPE,
+    dtype: StaticRawBytes,
 ) -> BYTES: ...
 
 
 def parse_fill_value(
     fill_value: Any,
-    dtype: ALL_DTYPES,
-) -> Any:
+    dtype: DTypeBase,
+) -> np.generic:
     """
     Parse `fill_value`, a potential fill value, into an instance of `dtype`, a data type.
     If `fill_value` is `None`, then this function will return the result of casting the value 0
@@ -508,26 +504,26 @@ def parse_fill_value(
     ----------
     fill_value : Any
         A potential fill value.
-    dtype : str
+    dtype : DTypeBase
         A valid Zarr format 3 DataType.
 
     Returns
     -------
     A scalar instance of `dtype`
     """
-    data_type = DataType(dtype)
     if fill_value is None:
         raise ValueError("Fill value cannot be None")
-    if data_type == DataType.string:
+
+    if dtype.kind == "string":
         return np.str_(fill_value)
-    if data_type == DataType.bytes:
+    if dtype.kind == "bytes":
         return np.bytes_(fill_value)
 
     # the rest are numeric types
-    np_dtype = cast(np.dtype[Any], data_type.to_numpy())
+    np_dtype = dtype.to_numpy()
 
     if isinstance(fill_value, Sequence) and not isinstance(fill_value, str):
-        if data_type in (DataType.complex64, DataType.complex128):
+        if isindata_type in (DataType.complex64, DataType.complex128):
             if len(fill_value) == 2:
                 decoded_fill_value = tuple(
                     SPECIAL_FLOATS_ENCODED.get(value, value) for value in fill_value
@@ -579,148 +575,3 @@ def parse_fill_value(
             raise ValueError(f"fill value {fill_value!r} is not valid for dtype {data_type}")
 
     return casted_value
-
-
-def default_fill_value(dtype: DataType) -> str | bytes | np.generic:
-    if dtype == DataType.string:
-        return ""
-    elif dtype == DataType.bytes:
-        return b""
-    else:
-        np_dtype = dtype.to_numpy()
-        np_dtype = cast(np.dtype[Any], np_dtype)
-        return np_dtype.type(0)  # type: ignore[misc]
-
-
-# For type checking
-_bool = bool
-
-
-class DataTypex(Enum):
-    bool = "bool"
-    int8 = "int8"
-    int16 = "int16"
-    int32 = "int32"
-    int64 = "int64"
-    uint8 = "uint8"
-    uint16 = "uint16"
-    uint32 = "uint32"
-    uint64 = "uint64"
-    float16 = "float16"
-    float32 = "float32"
-    float64 = "float64"
-    complex64 = "complex64"
-    complex128 = "complex128"
-    string = "string"
-    bytes = "bytes"
-
-    @property
-    def byte_count(self) -> int | None:
-        data_type_byte_counts = {
-            DataType.bool: 1,
-            DataType.int8: 1,
-            DataType.int16: 2,
-            DataType.int32: 4,
-            DataType.int64: 8,
-            DataType.uint8: 1,
-            DataType.uint16: 2,
-            DataType.uint32: 4,
-            DataType.uint64: 8,
-            DataType.float16: 2,
-            DataType.float32: 4,
-            DataType.float64: 8,
-            DataType.complex64: 8,
-            DataType.complex128: 16,
-        }
-        try:
-            return data_type_byte_counts[self]
-        except KeyError:
-            # string and bytes have variable length
-            return None
-
-    @property
-    def has_endianness(self) -> _bool:
-        return self.byte_count is not None and self.byte_count != 1
-
-    def to_numpy_shortname(self) -> str:
-        data_type_to_numpy = {
-            DataType.bool: "bool",
-            DataType.int8: "i1",
-            DataType.int16: "i2",
-            DataType.int32: "i4",
-            DataType.int64: "i8",
-            DataType.uint8: "u1",
-            DataType.uint16: "u2",
-            DataType.uint32: "u4",
-            DataType.uint64: "u8",
-            DataType.float16: "f2",
-            DataType.float32: "f4",
-            DataType.float64: "f8",
-            DataType.complex64: "c8",
-            DataType.complex128: "c16",
-        }
-        return data_type_to_numpy[self]
-
-    def to_numpy(self) -> np.dtypes.StringDType | np.dtypes.ObjectDType | np.dtype[Any]:
-        # note: it is not possible to round trip DataType <-> np.dtype
-        # due to the fact that DataType.string and DataType.bytes both
-        # generally return np.dtype("O") from this function, even though
-        # they can originate as fixed-length types (e.g. "<U10", "|S5")
-        if self == DataType.string:
-            return STRING_NP_DTYPE
-        elif self == DataType.bytes:
-            # for now always use object dtype for bytestrings
-            # TODO: consider whether we can use fixed-width types (e.g. '|S5') instead
-            return np.dtype("O")
-        else:
-            return np.dtype(self.to_numpy_shortname())
-
-    @classmethod
-    def from_numpy(cls, dtype: np.dtype[Any]) -> DataType:
-        if dtype.kind in "UT":
-            return DataType.string
-        elif dtype.kind == "S":
-            return DataType.bytes
-        elif not _NUMPY_SUPPORTS_VLEN_STRING and dtype.kind == "O":
-            # numpy < 2.0 does not support vlen string dtype
-            # so we fall back on object array of strings
-            return DataType.string
-        dtype_to_data_type = {
-            "|b1": "bool",
-            "bool": "bool",
-            "|i1": "int8",
-            "<i2": "int16",
-            "<i4": "int32",
-            "<i8": "int64",
-            "|u1": "uint8",
-            "<u2": "uint16",
-            "<u4": "uint32",
-            "<u8": "uint64",
-            "<f2": "float16",
-            "<f4": "float32",
-            "<f8": "float64",
-            "<c8": "complex64",
-            "<c16": "complex128",
-        }
-        return DataType[dtype_to_data_type[dtype.str]]
-
-    @classmethod
-    def parse(cls, dtype: DataType | Any | None) -> DataType:
-        if dtype is None:
-            return DataType[DEFAULT_DTYPE]
-        if isinstance(dtype, DataType):
-            return dtype
-        try:
-            return DataType(dtype)
-        except ValueError:
-            pass
-        try:
-            dtype = np.dtype(dtype)
-        except (ValueError, TypeError) as e:
-            raise ValueError(f"Invalid Zarr format 3 data_type: {dtype}") from e
-        # check that this is a valid v3 data_type
-        try:
-            data_type = DataType.from_numpy(dtype)
-        except KeyError as e:
-            raise ValueError(f"Invalid Zarr format 3 data_type: {dtype}") from e
-        return data_type
diff --git a/src/zarr/registry.py b/src/zarr/registry.py
index 272a72a16f..db2effaa76 100644
--- a/src/zarr/registry.py
+++ b/src/zarr/registry.py
@@ -24,7 +24,7 @@
     from zarr.core.buffer import Buffer, NDBuffer
     from zarr.core.common import JSON
     from zarr.core.dtype import ZarrDType
-    from zarr.core.metadata.dtype import DtypeBase
+    from zarr.core.metadata.dtype import DTypeBase
 
 __all__ = [
     "Registry",
@@ -64,7 +64,7 @@ def register(self, cls: type[T]) -> None:
 __pipeline_registry: Registry[CodecPipeline] = Registry()
 __buffer_registry: Registry[Buffer] = Registry()
 __ndbuffer_registry: Registry[NDBuffer] = Registry()
-__data_type_registry: Registry[DtypeBase] = Registry()
+__data_type_registry: Registry[DTypeBase] = Registry()
 __v3_dtype_registry: Registry[ZarrDType] = Registry()
 __v2_dtype_registry: Registry[ZarrDType] = Registry()
 
@@ -155,7 +155,7 @@ def register_buffer(cls: type[Buffer]) -> None:
     __buffer_registry.register(cls)
 
 
-def register_data_type(cls: type[DtypeBase]) -> None:
+def register_data_type(cls: type[DTypeBase]) -> None:
     __data_type_registry.register(cls)
 
 
@@ -303,7 +303,7 @@ def get_ndbuffer_class(reload_config: bool = False) -> type[NDBuffer]:
     )
 
 
-def get_data_type(dtype: str) -> type[DtypeBase]:
+def get_data_type(dtype: str) -> type[DTypeBase]:
     __data_type_registry.lazy_load()
     maybe_dtype_cls = __data_type_registry.get(dtype)
     if maybe_dtype_cls is None:
@@ -311,7 +311,7 @@ def get_data_type(dtype: str) -> type[DtypeBase]:
     return maybe_dtype_cls
 
 
-def get_data_type_from_numpy(dtype: npt.DTypeLike) -> type[DtypeBase]:
+def get_data_type_from_numpy(dtype: npt.DTypeLike) -> type[DTypeBase]:
     np_dtype = np.dtype(dtype)
     __data_type_registry.lazy_load()
     for val in __data_type_registry.values():

From 042fac1081b561a07c39c0089945f2a723f61694 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Thu, 27 Feb 2025 18:06:14 +0100
Subject: [PATCH 006/130] get json de/serialization largely working, and start
 making tests pass

---
 src/zarr/api/asynchronous.py    |   2 +-
 src/zarr/codecs/sharding.py     |   7 +-
 src/zarr/core/array.py          |  18 ++-
 src/zarr/core/common.py         |   9 +-
 src/zarr/core/config.py         |   6 +-
 src/zarr/core/metadata/dtype.py | 200 ++++++++++++++++++++++++--------
 src/zarr/core/metadata/v3.py    | 190 ++++--------------------------
 src/zarr/registry.py            |  56 +++++++--
 tests/test_array.py             |  10 +-
 tests/test_metadata/test_v3.py  | 147 ++++++-----------------
 10 files changed, 291 insertions(+), 354 deletions(-)

diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py
index 6059893920..792e445c9d 100644
--- a/src/zarr/api/asynchronous.py
+++ b/src/zarr/api/asynchronous.py
@@ -982,7 +982,7 @@ async def create(
     if zarr_format == 2:
         if chunks is None:
             chunks = shape
-        dtype = parse_dtype(dtype, zarr_format)
+        dtype = parse_dtype(dtype, zarr_format=zarr_format)
         if not filters:
             filters = _default_filters(dtype)
         if not compressor:
diff --git a/src/zarr/codecs/sharding.py b/src/zarr/codecs/sharding.py
index 42b1313fac..09ceb538d0 100644
--- a/src/zarr/codecs/sharding.py
+++ b/src/zarr/codecs/sharding.py
@@ -355,9 +355,10 @@ def __init__(
         object.__setattr__(self, "index_location", index_location_parsed)
 
         # Use instance-local lru_cache to avoid memory leaks
-        object.__setattr__(self, "_get_chunk_spec", lru_cache()(self._get_chunk_spec))
-        object.__setattr__(self, "_get_index_chunk_spec", lru_cache()(self._get_index_chunk_spec))
-        object.__setattr__(self, "_get_chunks_per_shard", lru_cache()(self._get_chunks_per_shard))
+        # TODO: fix these when we don't get hashability errors for certain numpy dtypes
+        # object.__setattr__(self, "_get_chunk_spec", lru_cache()(self._get_chunk_spec))
+        # object.__setattr__(self, "_get_index_chunk_spec", lru_cache()(self._get_index_chunk_spec))
+        # object.__setattr__(self, "_get_chunks_per_shard", lru_cache()(self._get_chunks_per_shard))
 
     # todo: typedict return type
     def __getstate__(self) -> dict[str, Any]:
diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py
index 2986b27fb0..c4da46da92 100644
--- a/src/zarr/core/array.py
+++ b/src/zarr/core/array.py
@@ -580,7 +580,7 @@ async def _create(
         """
         store_path = await make_store_path(store)
 
-        dtype_parsed = parse_dtype(dtype, zarr_format)
+        dtype_parsed = parse_dtype(dtype, zarr_format=zarr_format)
         shape = parse_shapelike(shape)
 
         if chunks is not None and chunk_shape is not None:
@@ -693,13 +693,23 @@ def _create_metadata_v3(
                 category=UserWarning,
                 stacklevel=2,
             )
+
+        # resolve the numpy dtype into zarr v3 datatype
+        zarr_data_type = get_data_type_from_numpy(dtype)
+
+        if fill_value is None:
+            # v3 spec will not allow a null fill value
+            fill_value_parsed = dtype.type(zarr_data_type.default)
+        else:
+            fill_value_parsed = fill_value
+
         chunk_grid_parsed = RegularChunkGrid(chunk_shape=chunk_shape)
         return ArrayV3Metadata(
             shape=shape,
-            data_type=dtype,
+            data_type=zarr_data_type,
             chunk_grid=chunk_grid_parsed,
             chunk_key_encoding=chunk_key_encoding_parsed,
-            fill_value=fill_value,
+            fill_value=fill_value_parsed,
             codecs=codecs,
             dimension_names=tuple(dimension_names) if dimension_names else None,
             attributes=attributes or {},
@@ -3909,7 +3919,7 @@ async def init_array(
 
     from zarr.codecs.sharding import ShardingCodec, ShardingCodecIndexLocation
 
-    dtype_parsed = parse_dtype(dtype)
+    dtype_parsed = parse_dtype(dtype, zarr_format=zarr_format)
     shape_parsed = parse_shapelike(shape)
     chunk_key_encoding_parsed = _parse_chunk_key_encoding(
         chunk_key_encoding, zarr_format=zarr_format
diff --git a/src/zarr/core/common.py b/src/zarr/core/common.py
index e005cceed0..e398eff406 100644
--- a/src/zarr/core/common.py
+++ b/src/zarr/core/common.py
@@ -19,6 +19,7 @@
 import numpy as np
 
 from zarr.core.config import config as zarr_config
+from zarr.core.strings import _VLEN_STRING_DTYPE
 
 if TYPE_CHECKING:
     from collections.abc import Awaitable, Callable, Iterator
@@ -166,7 +167,13 @@ def parse_bool(data: Any) -> bool:
     raise ValueError(f"Expected bool, got {data} instead.")
 
 
-def parse_dtype(dtype: Any) -> np.dtype[Any]:
+def parse_dtype(dtype: Any, zarr_format: ZarrFormat) -> np.dtype[Any]:
+    if dtype is str or dtype == "str":
+        if zarr_format == 2:
+            # special case as object
+            return np.dtype("object")
+        else:
+            return _VLEN_STRING_DTYPE
     return np.dtype(dtype)
 
 
diff --git a/src/zarr/core/config.py b/src/zarr/core/config.py
index c565cb0708..98252f572c 100644
--- a/src/zarr/core/config.py
+++ b/src/zarr/core/config.py
@@ -88,13 +88,17 @@ def enable_gpu(self) -> ConfigSet:
                     "bytes": [{"id": "vlen-bytes"}],
                     "raw": None,
                 },
-                "v3_default_filters": {"numeric": [], "string": [], "bytes": []},
+                "v3_default_filters": {"boolean": [], "numeric": [], "string": [], "bytes": []},
                 "v3_default_serializer": {
+                    "boolean": {"name": "bytes", "configuration": {"endian": "little"}},
                     "numeric": {"name": "bytes", "configuration": {"endian": "little"}},
                     "string": {"name": "vlen-utf8"},
                     "bytes": {"name": "vlen-bytes"},
                 },
                 "v3_default_compressors": {
+                    "boolean": [
+                        {"name": "zstd", "configuration": {"level": 0, "checksum": False}},
+                    ],
                     "numeric": [
                         {"name": "zstd", "configuration": {"level": 0, "checksum": False}},
                     ],
diff --git a/src/zarr/core/metadata/dtype.py b/src/zarr/core/metadata/dtype.py
index 19a00343c8..8f940b0e0b 100644
--- a/src/zarr/core/metadata/dtype.py
+++ b/src/zarr/core/metadata/dtype.py
@@ -1,4 +1,5 @@
 from abc import ABC, abstractmethod
+from collections.abc import Sequence
 from dataclasses import dataclass
 from typing import Any, ClassVar, Literal, Self, TypeGuard, cast, get_args
 
@@ -8,7 +9,7 @@
 from zarr.abc.metadata import Metadata
 from zarr.core.common import JSON, ZarrFormat
 from zarr.core.strings import _NUMPY_SUPPORTS_VLEN_STRING
-from zarr.registry import register_data_type
+from zarr.registry import get_data_type_from_dict, register_data_type
 
 Endianness = Literal["little", "big", "native"]
 DataTypeFlavor = Literal["boolean", "numeric", "string", "bytes"]
@@ -30,11 +31,11 @@ def endianness_to_numpy_str(endianness: Endianness | None) -> Literal[">", "<",
     )
 
 
-def check_json_bool(data: JSON) -> TypeGuard[bool]:
+def check_str(data: JSON) -> TypeGuard[bool]:
     return bool(isinstance(data, bool))
 
 
-def check_json_int(data: JSON) -> TypeGuard[int]:
+def check_int(data: JSON) -> TypeGuard[int]:
     return bool(isinstance(data, int))
 
 
@@ -42,7 +43,21 @@ def check_json_float(data: JSON) -> TypeGuard[float]:
     if data == "NaN" or data == "Infinity" or data == "-Infinity":
         return True
     else:
-        return bool(isinstance(data, float))
+        return bool(isinstance(data, float | int))
+
+
+def check_json_complex_float(data: JSON) -> TypeGuard[tuple[JSONFloat, JSONFloat]]:
+    return (
+        not isinstance(data, str)
+        and isinstance(data, Sequence)
+        and len(data) == 2
+        and check_json_float(data[0])
+        and check_json_float(data[1])
+    )
+
+
+def check_str(data: JSON) -> TypeGuard[str]:
+    return bool(isinstance(data, str))
 
 
 def float_to_json_v2(data: float | np.floating[Any]) -> JSONFloat:
@@ -70,16 +85,16 @@ def float_to_json(data: float | np.floating[Any], zarr_format: ZarrFormat) -> JS
     raise ValueError(f"Invalid zarr format: {zarr_format}. Expected 2 or 3.")
 
 
-def complex_to_json_v2(data: complex | np.complex_) -> JSONFloat:
+def complex_to_json_v2(data: complex | np.complexfloating) -> JSONFloat:
     return float_to_json_v2(data)
 
 
-def complex_to_json_v3(data: complex | np.complex_) -> tuple[JSONFloat, JSONFloat]:
+def complex_to_json_v3(data: complex | np.complexfloating) -> tuple[JSONFloat, JSONFloat]:
     return float_to_json_v3(data.real), float_to_json_v3(data.imag)
 
 
 def complex_to_json(
-    data: complex | np.complex_, zarr_format: ZarrFormat
+    data: complex | np.complexfloating, zarr_format: ZarrFormat
 ) -> tuple[JSONFloat, JSONFloat] | JSONFloat:
     if zarr_format == 2:
         return complex_to_json_v2(data)
@@ -88,7 +103,7 @@ def complex_to_json(
     raise ValueError(f"Invalid zarr format: {zarr_format}. Expected 2 or 3.")
 
 
-def float_from_json_v2(data: JSONFloat, dtype: np.floating[Any]) -> np.float_:
+def float_from_json_v2(data: JSONFloat, dtype: np.floating[Any]) -> np.floating[Any]:
     if data == "NaN":
         _data = np.nan
     elif data == "Infinity":
@@ -113,21 +128,24 @@ def float_from_json(data: JSONFloat, dtype: Any, zarr_format: ZarrFormat) -> np.
     raise ValueError(f"Invalid zarr format: {zarr_format}. Expected 2 or 3.")
 
 
-def complex_from_json_v2(data: JSONFloat, dtype: Any) -> np.complex_:
+def complex_from_json_v2(data: JSONFloat, dtype: Any) -> np.complexfloating:
     return dtype.type(data)
 
 
-def complex_from_json_v3(data: tuple[JSONFloat, JSONFloat], dtype: Any) -> np.complex_:
-    return dtype.type(data[0] + 1j * data[1])
+def complex_from_json_v3(data: tuple[JSONFloat, JSONFloat], dtype: Any) -> np.complexfloating:
+    return dtype.type(complex(*data))
 
 
 def complex_from_json(
     data: tuple[JSONFloat, JSONFloat], dtype: Any, zarr_format: ZarrFormat
-) -> np.complex_:
+) -> np.complexfloating:
     if zarr_format == 2:
         return complex_from_json_v2(data, dtype)
     else:
-        return complex_from_json_v3(data, dtype)
+        if check_json_complex_float(data):
+            return complex_from_json_v3(data, dtype)
+        else:
+            raise TypeError(f"Invalid type: {data}. Expected a sequence of two numbers.")
     raise ValueError(f"Invalid zarr format: {zarr_format}. Expected 2 or 3.")
 
 
@@ -203,7 +221,7 @@ def to_json_value(self, data: np.generic, zarr_format: ZarrFormat) -> bool:
     def from_json_value(
         self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
     ) -> np.bool_:
-        if check_json_bool(data):
+        if check_str(data):
             return self.to_numpy(endianness=endianness).type(data)
         raise TypeError(f"Invalid type: {data}. Expected a boolean.")
 
@@ -222,13 +240,13 @@ class Int8(DTypeBase):
     def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.Int8DType:
         return super().to_numpy(endianness=endianness)
 
-    def to_json_value(self, data: np.generic) -> int:
+    def to_json_value(self, data: np.generic, zarr_format: ZarrFormat) -> int:
         return int(data)
 
     def from_json_value(
         self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
     ) -> np.int8:
-        if check_json_int(data):
+        if check_int(data):
             return self.to_numpy(endianness=endianness).type(data)
         raise TypeError(f"Invalid type: {data}. Expected an integer.")
 
@@ -247,13 +265,13 @@ class UInt8(DTypeBase):
     def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.UInt8DType:
         return super().to_numpy(endianness=endianness)
 
-    def to_json_value(self, data: np.generic) -> int:
+    def to_json_value(self, data: np.generic, zarr_format: ZarrFormat) -> int:
         return int(data)
 
     def from_json_value(
         self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
     ) -> np.uint8:
-        if check_json_int(data):
+        if check_int(data):
             return self.to_numpy(endianness=endianness).type(data)
         raise TypeError(f"Invalid type: {data}. Expected an integer.")
 
@@ -272,13 +290,13 @@ class Int16(DTypeBase):
     def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.Int16DType:
         return super().to_numpy(endianness=endianness)
 
-    def to_json_value(self, data: np.generic) -> int:
+    def to_json_value(self, data: np.generic, zarr_format: ZarrFormat) -> int:
         return int(data)
 
     def from_json_value(
         self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
     ) -> np.int16:
-        if check_json_int(data):
+        if check_int(data):
             return self.to_numpy(endianness=endianness).type(data)
         raise TypeError(f"Invalid type: {data}. Expected an integer.")
 
@@ -297,13 +315,13 @@ class UInt16(DTypeBase):
     def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.UInt16DType:
         return super().to_numpy(endianness=endianness)
 
-    def to_json_value(self, data: np.generic) -> int:
+    def to_json_value(self, data: np.generic, zarr_format: ZarrFormat) -> int:
         return int(data)
 
     def from_json_value(
         self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
     ) -> np.uint16:
-        if check_json_int(data):
+        if check_int(data):
             return self.to_numpy(endianness=endianness).type(data)
         raise TypeError(f"Invalid type: {data}. Expected an integer.")
 
@@ -322,13 +340,13 @@ class Int32(DTypeBase):
     def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.Int32DType:
         return super().to_numpy(endianness=endianness)
 
-    def to_json_value(self, data: np.generic) -> int:
+    def to_json_value(self, data: np.generic, zarr_format: ZarrFormat) -> int:
         return int(data)
 
     def from_json_value(
         self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
     ) -> np.int32:
-        if check_json_int(data):
+        if check_int(data):
             return self.to_numpy(endianness=endianness).type(data)
         raise TypeError(f"Invalid type: {data}. Expected an integer.")
 
@@ -347,13 +365,13 @@ class UInt32(DTypeBase):
     def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.UInt32DType:
         return super().to_numpy(endianness=endianness)
 
-    def to_json_value(self, data: np.generic) -> int:
+    def to_json_value(self, data: np.generic, zarr_format: ZarrFormat) -> int:
         return int(data)
 
     def from_json_value(
         self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
     ) -> np.uint32:
-        if check_json_int(data):
+        if check_int(data):
             return self.to_numpy(endianness=endianness).type(data)
         raise TypeError(f"Invalid type: {data}. Expected an integer.")
 
@@ -372,11 +390,13 @@ class Int64(DTypeBase):
     def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.Int64DType:
         return super().to_numpy(endianness=endianness)
 
-    def to_json_value(self, data: np.generic) -> int:
+    def to_json_value(self, data: np.generic, zarr_format: ZarrFormat) -> int:
         return int(data)
 
-    def from_json_value(self, data: JSON, *, endianness: Endianness | None = None) -> np.int64:
-        if check_json_int(data):
+    def from_json_value(
+        self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
+    ) -> np.int64:
+        if check_int(data):
             return self.to_numpy(endianness=endianness).type(data)
         raise TypeError(f"Invalid type: {data}. Expected an integer.")
 
@@ -395,11 +415,13 @@ class UInt64(DTypeBase):
     def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.UInt64DType:
         return super().to_numpy(endianness=endianness)
 
-    def to_json_value(self, data: np.generic) -> int:
+    def to_json_value(self, data: np.generic, zarr_format: ZarrFormat) -> int:
         return int(data)
 
-    def from_json_value(self, data: JSON, *, endianness: Endianness | None = None) -> np.uint64:
-        if check_json_int(data):
+    def from_json_value(
+        self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
+    ) -> np.uint64:
+        if check_int(data):
             return self.to_numpy(endianness=endianness).type(data)
         raise TypeError(f"Invalid type: {data}. Expected an integer.")
 
@@ -418,10 +440,12 @@ class Float16(DTypeBase):
     def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.Float16DType:
         return super().to_numpy(endianness=endianness)
 
-    def to_json_value(self, data: np.generic) -> float:
+    def to_json_value(self, data: np.generic, zarr_format: ZarrFormat) -> float:
         return float(data)
 
-    def from_json_value(self, data: JSON, *, endianness: Endianness | None = None) -> np.float16:
+    def from_json_value(
+        self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
+    ) -> np.float16:
         if check_json_float(data):
             return self.to_numpy(endianness=endianness).type(data)
         raise TypeError(f"Invalid type: {data}. Expected a float.")
@@ -441,10 +465,12 @@ class Float32(DTypeBase):
     def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.Float32DType:
         return super().to_numpy(endianness=endianness)
 
-    def to_json_value(self, data: np.generic) -> float:
+    def to_json_value(self, data: np.generic, zarr_format: ZarrFormat) -> float:
         return float(data)
 
-    def from_json_value(self, data: JSON, *, endianness: Endianness | None = None) -> np.float32:
+    def from_json_value(
+        self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
+    ) -> np.float32:
         if check_json_float(data):
             return self.to_numpy(endianness=endianness).type(data)
         raise TypeError(f"Invalid type: {data}. Expected a float.")
@@ -464,10 +490,12 @@ class Float64(DTypeBase):
     def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.Float64DType:
         return super().to_numpy(endianness=endianness)
 
-    def to_json_value(self, data: np.generic) -> float:
+    def to_json_value(self, data: np.generic, zarr_format: ZarrFormat) -> float:
         return float(data)
 
-    def from_json_value(self, data: JSON, *, endianness: Endianness | None = None) -> np.float64:
+    def from_json_value(
+        self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
+    ) -> np.float64:
         if check_json_float(data):
             return float_from_json(data, dtype=self.to_numpy(endianness=endianness))
         raise TypeError(f"Invalid type: {data}. Expected a float.")
@@ -487,13 +515,19 @@ class Complex64(DTypeBase):
     def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.Complex64DType:
         return super().to_numpy(endianness=endianness)
 
-    def to_json_value(self, data: np.generic) -> float:
-        return float(data)
+    def to_json_value(
+        self, data: np.generic, zarr_format: ZarrFormat
+    ) -> tuple[JSONFloat, JSONFloat]:
+        return complex_to_json(data, zarr_format)
 
-    def from_json_value(self, data: JSON, *, endianness: Endianness | None = None) -> np.complex64:
-        if check_json_float(data):
-            return self.to_numpy(endianness=endianness).type(data)
-        raise TypeError(f"Invalid type: {data}. Expected a float.")
+    def from_json_value(
+        self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
+    ) -> np.complex64:
+        if check_json_complex_float(data):
+            return complex_from_json(
+                data, dtype=self.to_numpy(endianness=endianness), zarr_format=zarr_format
+            )
+        raise TypeError(f"Invalid type: {data}. Expected a complex float.")
 
 
 register_data_type(Complex64)
@@ -501,7 +535,7 @@ def from_json_value(self, data: JSON, *, endianness: Endianness | None = None) -
 
 @dataclass(frozen=True, kw_only=True)
 class Complex128(DTypeBase):
-    name = "complex64"
+    name = "complex128"
     item_size = 32
     kind = "numeric"
     numpy_character_code = "D"
@@ -510,6 +544,20 @@ class Complex128(DTypeBase):
     def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.Complex128DType:
         return super().to_numpy(endianness=endianness)
 
+    def to_json_value(
+        self, data: np.generic, zarr_format: ZarrFormat
+    ) -> tuple[JSONFloat, JSONFloat]:
+        return complex_to_json(data, zarr_format)
+
+    def from_json_value(
+        self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
+    ) -> np.complex128:
+        if check_json_complex_float(data):
+            return complex_from_json(
+                data, dtype=self.to_numpy(endianness=endianness), zarr_format=zarr_format
+            )
+        raise TypeError(f"Invalid type: {data}. Expected a complex float.")
+
 
 register_data_type(Complex128)
 
@@ -536,6 +584,21 @@ def to_numpy(self, endianness: Endianness | None = "native") -> np.dtype[np.byte
         endianness_code = endianness_to_numpy_str(endianness)
         return np.dtype(endianness_code + self.numpy_character_code + str(self.length))
 
+    def to_json_value(
+        self, data: np.generic, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
+    ) -> str:
+        return data.tobytes().decode("ascii")
+
+    def from_json_value(
+        self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
+    ) -> np.bytes_:
+        if check_str(data):
+            return self.to_numpy(endianness=endianness).type(data.encode("ascii"))
+        raise TypeError(f"Invalid type: {data}. Expected a string.")
+
+
+register_data_type(StaticByteString)
+
 
 @dataclass(frozen=True, kw_only=True)
 class StaticRawBytes(DTypeBase, Flexible):
@@ -559,8 +622,17 @@ def to_numpy(self, endianness: Endianness | None = "native") -> np.dtype[np.void
         endianness_code = endianness_to_numpy_str(endianness)
         return np.dtype(endianness_code + self.numpy_character_code + str(self.length))
 
+    def to_json_value(self, data: np.generic, *, zarr_format: ZarrFormat) -> tuple[int, ...]:
+        return tuple(*data.tobytes())
 
-register_data_type(StaticByteString)
+    def from_json_value(
+        self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
+    ) -> np.void:
+        # todo: check that this is well-formed
+        return self.to_numpy(endianness=endianness).type(bytes(data))
+
+
+register_data_type(StaticRawBytes)
 
 if _NUMPY_SUPPORTS_VLEN_STRING:
 
@@ -583,6 +655,14 @@ def to_numpy(
             endianness_code = endianness_to_numpy_str(endianness)
             return np.dtype(endianness_code + self.numpy_character_code)
 
+        def to_json_value(self, data: np.generic, *, zarr_format: ZarrFormat) -> str:
+            return str(data)
+
+        def from_json_value(
+            self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
+        ) -> "np.dtypes.StringDType":
+            return self.to_numpy(endianness=endianness).type(data)
+
 else:
 
     @dataclass(frozen=True, kw_only=True)
@@ -602,6 +682,14 @@ def to_numpy(
             endianness_code = endianness_to_numpy_str(endianness)
             return np.dtype(endianness_code + self.numpy_character_code)
 
+        def to_json_value(self, data, *, zarr_format: ZarrFormat) -> str:
+            return str(data)
+
+        def from_json_value(
+            self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
+        ) -> np.dtypes.ObjectDType:
+            return self.to_numpy(endianness=endianness).type(data)
+
 
 register_data_type(VlenString)
 
@@ -628,20 +716,30 @@ def to_numpy(self, endianness: Endianness | None = "native") -> np.dtype[np.str_
         endianness_code = endianness_to_numpy_str(endianness)
         return np.dtype(endianness_code + self.numpy_character_code + str(self.length))
 
+    def to_json_value(self, data: np.generic, *, zarr_format: ZarrFormat) -> str:
+        return str(data)
+
+    def from_json_value(
+        self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
+    ) -> np.str_:
+        if not check_str(data):
+            raise TypeError(f"Invalid type: {data}. Expected a string.")
+        return self.to_numpy(endianness=endianness).type(data)
+
 
 register_data_type(StaticUnicodeString)
 
 
-def resolve_dtype(dtype: npt.DTypeLike | DTypeBase) -> DTypeBase:
+def resolve_dtype(dtype: npt.DTypeLike | DTypeBase | dict[str, JSON]) -> DTypeBase:
     from zarr.registry import get_data_type_from_numpy
 
     if isinstance(dtype, DTypeBase):
         return dtype
-    cls = get_data_type_from_numpy(dtype)
-    return cls.from_numpy(dtype)
-
+    elif isinstance(dtype, dict):
+        return get_data_type_from_dict(dtype)
+    else:
+        return get_data_type_from_numpy(dtype)
 
-register_data_type(StaticRawBytes)
 
 INTEGER_DTYPE = Int8 | Int16 | Int32 | Int64 | UInt8 | UInt16 | UInt32 | UInt64
 FLOAT_DTYPE = Float16 | Float32 | Float64
diff --git a/src/zarr/core/metadata/v3.py b/src/zarr/core/metadata/v3.py
index 87bb001164..b117e00dd0 100644
--- a/src/zarr/core/metadata/v3.py
+++ b/src/zarr/core/metadata/v3.py
@@ -1,20 +1,9 @@
 from __future__ import annotations
 
-import warnings
-from typing import TYPE_CHECKING, TypedDict, overload
+from typing import TYPE_CHECKING, TypedDict
 
 from zarr.abc.metadata import Metadata
 from zarr.core.buffer.core import default_buffer_prototype
-from zarr.core.metadata.dtype import (
-    COMPLEX_DTYPE,
-    FLOAT_DTYPE,
-    INTEGER_DTYPE,
-    STRING_DTYPE,
-    Bool,
-    DTypeBase,
-    StaticRawBytes,
-    resolve_dtype,
-)
 
 if TYPE_CHECKING:
     from collections.abc import Callable
@@ -23,16 +12,18 @@
     from zarr.core.buffer import Buffer, BufferPrototype
     from zarr.core.chunk_grids import ChunkGrid
     from zarr.core.common import JSON, ChunkCoords
+    from zarr.core.metadata.dtype import (
+        DTypeBase,
+    )
 
 import json
-from collections.abc import Iterable, Sequence
+from collections.abc import Iterable
 from dataclasses import dataclass, field, replace
 from enum import Enum
 from typing import Any, Literal
 
 import numcodecs.abc
 import numpy as np
-import numpy.typing as npt
 
 from zarr.abc.codec import ArrayArrayCodec, ArrayBytesCodec, BytesBytesCodec, Codec
 from zarr.core.array_spec import ArrayConfig, ArraySpec
@@ -48,7 +39,7 @@
 from zarr.core.config import config
 from zarr.core.metadata.common import parse_attributes
 from zarr.errors import MetadataValidationError, NodeTypeValidationError
-from zarr.registry import get_codec_class
+from zarr.registry import get_codec_class, get_data_type_by_name, get_data_type_from_dict
 
 DEFAULT_DTYPE = "float64"
 
@@ -259,7 +250,7 @@ def __init__(
         self,
         *,
         shape: Iterable[int],
-        data_type: npt.DTypeLike | DTypeBase,
+        data_type: DTypeBase,
         chunk_grid: dict[str, JSON] | ChunkGrid,
         chunk_key_encoding: ChunkKeyEncodingLike,
         fill_value: object,
@@ -272,12 +263,12 @@ def __init__(
         Because the class is a frozen dataclass, we set attributes using object.__setattr__
         """
         shape_parsed = parse_shapelike(shape)
-        data_type_parsed = resolve_dtype(data_type)
+        data_type_parsed = data_type
         chunk_grid_parsed = ChunkGrid.from_dict(chunk_grid)
         chunk_key_encoding_parsed = ChunkKeyEncoding.from_dict(chunk_key_encoding)
         dimension_names_parsed = parse_dimension_names(dimension_names)
         # we pass a string here rather than an enum to make mypy happy
-        fill_value_parsed = parse_fill_value(fill_value, data_type_parsed)
+        fill_value_parsed = data_type_parsed.to_numpy().type(fill_value)
         attributes_parsed = parse_attributes(attributes)
         codecs_parsed_partial = parse_codecs(codecs)
         storage_transformers_parsed = parse_storage_transformers(storage_transformers)
@@ -392,7 +383,8 @@ def encode_chunk_key(self, chunk_coords: ChunkCoords) -> str:
         return self.chunk_key_encoding.encode_chunk_key(chunk_coords)
 
     def to_buffer_dict(self, prototype: BufferPrototype) -> dict[str, Buffer]:
-        d = _replace_special_floats(self.to_dict())
+        d = self.to_dict()
+        # d = _replace_special_floats(self.to_dict())
         return {ZARR_JSON: prototype.buffer.from_bytes(json.dumps(d, cls=V3JsonEncoder).encode())}
 
     @classmethod
@@ -405,8 +397,13 @@ def from_dict(cls, data: dict[str, JSON]) -> Self:
         # check that the node_type attribute is correct
         _ = parse_node_type_array(_data.pop("node_type"))
 
-        # check that the data_type attribute is valid
-        data_type = DataType.parse(_data.pop("data_type"))
+        data_type_json = _data.pop("data_type")
+        if isinstance(data_type_json, str):
+            # check that the data_type attribute is valid
+            data_type = get_data_type_by_name(data_type_json)
+
+        else:
+            data_type = get_data_type_from_dict(data_type_json)
 
         # dimension_names key is optional, normalize missing to `None`
         _data["dimension_names"] = _data.pop("dimension_names", None)
@@ -416,7 +413,9 @@ def from_dict(cls, data: dict[str, JSON]) -> Self:
 
     def to_dict(self) -> dict[str, JSON]:
         out_dict = super().to_dict()
-
+        out_dict["fill_value"] = self.data_type.to_json_value(
+            self.fill_value, zarr_format=self.zarr_format
+        )
         if not isinstance(out_dict, dict):
             raise TypeError(f"Expected dict. Got {type(out_dict)}.")
 
@@ -424,6 +423,9 @@ def to_dict(self) -> dict[str, JSON]:
         # the metadata document
         if out_dict["dimension_names"] is None:
             out_dict.pop("dimension_names")
+        # if data_type has no configuration, we just serialize the name
+        if "configuration" not in out_dict["data_type"]:
+            out_dict["data_type"] = out_dict["data_type"]["name"]
         return out_dict
 
     def update_shape(self, shape: ChunkCoords) -> Self:
@@ -431,147 +433,3 @@ def update_shape(self, shape: ChunkCoords) -> Self:
 
     def update_attributes(self, attributes: dict[str, JSON]) -> Self:
         return replace(self, attributes=attributes)
-
-
-# enum Literals can't be used in typing, so we have to restate all of the V3 dtypes as types
-# https://github.com/python/typing/issues/781
-
-BOOL = np.bool_
-INTEGER = np.int8 | np.int16 | np.int32 | np.int64 | np.uint8 | np.uint16 | np.uint32 | np.uint64
-FLOAT = np.float16 | np.float32 | np.float64
-COMPLEX = np.complex64 | np.complex128
-
-STRING = np.str_
-BYTES = np.bytes_
-
-
-@overload
-def parse_fill_value(
-    fill_value: complex | str | bytes | np.generic | Sequence[Any] | bool,
-    dtype: Bool,
-) -> BOOL: ...
-
-
-@overload
-def parse_fill_value(
-    fill_value: complex | str | bytes | np.generic | Sequence[Any] | bool,
-    dtype: INTEGER_DTYPE,
-) -> INTEGER: ...
-
-
-@overload
-def parse_fill_value(
-    fill_value: complex | str | bytes | np.generic | Sequence[Any] | bool,
-    dtype: FLOAT_DTYPE,
-) -> FLOAT: ...
-
-
-@overload
-def parse_fill_value(
-    fill_value: complex | str | bytes | np.generic | Sequence[Any] | bool,
-    dtype: COMPLEX_DTYPE,
-) -> COMPLEX: ...
-
-
-@overload
-def parse_fill_value(
-    fill_value: complex | str | bytes | np.generic | Sequence[Any] | bool,
-    dtype: STRING_DTYPE,
-) -> STRING: ...
-
-
-@overload
-def parse_fill_value(
-    fill_value: complex | str | bytes | np.generic | Sequence[Any] | bool,
-    dtype: StaticRawBytes,
-) -> BYTES: ...
-
-
-def parse_fill_value(
-    fill_value: Any,
-    dtype: DTypeBase,
-) -> np.generic:
-    """
-    Parse `fill_value`, a potential fill value, into an instance of `dtype`, a data type.
-    If `fill_value` is `None`, then this function will return the result of casting the value 0
-    to the provided data type. Otherwise, `fill_value` will be cast to the provided data type.
-
-    Note that some numpy dtypes use very permissive casting rules. For example,
-    `np.bool_({'not remotely a bool'})` returns `True`. Thus this function should not be used for
-    validating that the provided fill value is a valid instance of the data type.
-
-    Parameters
-    ----------
-    fill_value : Any
-        A potential fill value.
-    dtype : DTypeBase
-        A valid Zarr format 3 DataType.
-
-    Returns
-    -------
-    A scalar instance of `dtype`
-    """
-    if fill_value is None:
-        raise ValueError("Fill value cannot be None")
-
-    if dtype.kind == "string":
-        return np.str_(fill_value)
-    if dtype.kind == "bytes":
-        return np.bytes_(fill_value)
-
-    # the rest are numeric types
-    np_dtype = dtype.to_numpy()
-
-    if isinstance(fill_value, Sequence) and not isinstance(fill_value, str):
-        if isindata_type in (DataType.complex64, DataType.complex128):
-            if len(fill_value) == 2:
-                decoded_fill_value = tuple(
-                    SPECIAL_FLOATS_ENCODED.get(value, value) for value in fill_value
-                )
-                # complex datatypes serialize to JSON arrays with two elements
-                return np_dtype.type(complex(*decoded_fill_value))
-            else:
-                msg = (
-                    f"Got an invalid fill value for complex data type {data_type.value}."
-                    f"Expected a sequence with 2 elements, but {fill_value!r} has "
-                    f"length {len(fill_value)}."
-                )
-                raise ValueError(msg)
-        msg = f"Cannot parse non-string sequence {fill_value!r} as a scalar with type {data_type.value}."
-        raise TypeError(msg)
-
-    # Cast the fill_value to the given dtype
-    try:
-        # This warning filter can be removed after Zarr supports numpy>=2.0
-        # The warning is saying that the future behavior of out of bounds casting will be to raise
-        # an OverflowError. In the meantime, we allow overflow and catch cases where
-        # fill_value != casted_value below.
-        with warnings.catch_warnings():
-            warnings.filterwarnings("ignore", category=DeprecationWarning)
-            casted_value = np.dtype(np_dtype).type(fill_value)
-    except (ValueError, OverflowError, TypeError) as e:
-        raise ValueError(f"fill value {fill_value!r} is not valid for dtype {data_type}") from e
-    # Check if the value is still representable by the dtype
-    if (fill_value == "NaN" and np.isnan(casted_value)) or (
-        fill_value in ["Infinity", "-Infinity"] and not np.isfinite(casted_value)
-    ):
-        pass
-    elif np_dtype.kind == "f":
-        # float comparison is not exact, especially when dtype <float64
-        # so we use np.isclose for this comparison.
-        # this also allows us to compare nan fill_values
-        if not np.isclose(fill_value, casted_value, equal_nan=True):
-            raise ValueError(f"fill value {fill_value!r} is not valid for dtype {data_type}")
-    elif np_dtype.kind == "c":
-        # confusingly np.isclose(np.inf, np.inf + 0j) is False on numpy<2, so compare real and imag parts
-        # explicitly.
-        if not (
-            np.isclose(np.real(fill_value), np.real(casted_value), equal_nan=True)
-            and np.isclose(np.imag(fill_value), np.imag(casted_value), equal_nan=True)
-        ):
-            raise ValueError(f"fill value {fill_value!r} is not valid for dtype {data_type}")
-    else:
-        if fill_value != casted_value:
-            raise ValueError(f"fill value {fill_value!r} is not valid for dtype {data_type}")
-
-    return casted_value
diff --git a/src/zarr/registry.py b/src/zarr/registry.py
index db2effaa76..7ad688a61a 100644
--- a/src/zarr/registry.py
+++ b/src/zarr/registry.py
@@ -2,8 +2,9 @@
 
 import warnings
 from collections import defaultdict
+from dataclasses import dataclass, field
 from importlib.metadata import entry_points as get_entry_points
-from typing import TYPE_CHECKING, Any, Generic, TypeVar
+from typing import TYPE_CHECKING, Any, Generic, Self, TypeVar
 
 import numpy as np
 
@@ -60,11 +61,33 @@ def register(self, cls: type[T]) -> None:
         self[fully_qualified_name(cls)] = cls
 
 
+@dataclass(frozen=True, kw_only=True)
+class DataTypeRegistry:
+    contents: dict[str, type[DTypeBase]] = field(default_factory=dict, init=False)
+    lazy_load_list: list[EntryPoint] = field(default_factory=list, init=False)
+
+    def lazy_load(self) -> None:
+        for e in self.lazy_load_list:
+            self.register(e.load())
+
+        self.lazy_load_list.clear()
+
+    def register(self: Self, cls: type[DTypeBase], clobber: bool = False) -> None:
+        if cls.name in self.contents and not clobber:
+            raise ValueError(
+                f"Data type {cls.name} already registered. Use clobber=True to overwrite."
+            )
+        self.contents[cls.name] = cls
+
+    def get(self, key: str) -> type[DTypeBase]:
+        return self.contents[key]
+
+
 __codec_registries: dict[str, Registry[Codec]] = defaultdict(Registry)
 __pipeline_registry: Registry[CodecPipeline] = Registry()
 __buffer_registry: Registry[Buffer] = Registry()
 __ndbuffer_registry: Registry[NDBuffer] = Registry()
-__data_type_registry: Registry[DTypeBase] = Registry()
+__data_type_registry = DataTypeRegistry()
 __v3_dtype_registry: Registry[ZarrDType] = Registry()
 __v2_dtype_registry: Registry[ZarrDType] = Registry()
 
@@ -103,8 +126,8 @@ def _collect_entrypoints() -> list[Registry[Any]]:
     __ndbuffer_registry.lazy_load_list.extend(entry_points.select(group="zarr.ndbuffer"))
     __ndbuffer_registry.lazy_load_list.extend(entry_points.select(group="zarr", name="ndbuffer"))
 
-    __data_type_registry.lazy_load_list.extend(entry_points.select(group="zarr.data_type"))
-    __data_type_registry.lazy_load_list.extend(entry_points.select(group="zarr", name="data_type"))
+    # __data_type_registry.lazy_load_list.extend(entry_points.select(group="zarr.data_type"))
+    # __data_type_registry.lazy_load_list.extend(entry_points.select(group="zarr", name="data_type"))
 
     __v3_dtype_registry.lazy_load_list.extend(entry_points.select(group="zarr.v3dtype"))
     __v3_dtype_registry.lazy_load_list.extend(entry_points.select(group="zarr", name="v3dtype"))
@@ -303,22 +326,35 @@ def get_ndbuffer_class(reload_config: bool = False) -> type[NDBuffer]:
     )
 
 
-def get_data_type(dtype: str) -> type[DTypeBase]:
+def get_data_type_by_name(dtype: str, configuration: dict[str, JSON] | None = None) -> DTypeBase:
     __data_type_registry.lazy_load()
+    if configuration is None:
+        _configuration = {}
+    else:
+        _configuration = configuration
     maybe_dtype_cls = __data_type_registry.get(dtype)
     if maybe_dtype_cls is None:
         raise ValueError(f"No data type class matching name {dtype}")
-    return maybe_dtype_cls
+    return maybe_dtype_cls.from_dict(_configuration)
 
 
-def get_data_type_from_numpy(dtype: npt.DTypeLike) -> type[DTypeBase]:
+def get_data_type_from_dict(dtype: dict[str, JSON]) -> DTypeBase:
+    __data_type_registry.lazy_load()
+    dtype_name = dtype["name"]
+    dtype_cls = __data_type_registry.get(dtype_name)
+    if dtype_cls is None:
+        raise ValueError(f"No data type class matching name {dtype_name}")
+    return dtype_cls.from_dict(dtype.get("configuration", {}))
+
+
+def get_data_type_from_numpy(dtype: npt.DTypeLike) -> DTypeBase:
     np_dtype = np.dtype(dtype)
     __data_type_registry.lazy_load()
-    for val in __data_type_registry.values():
+    for val in __data_type_registry.contents.values():
         if val.numpy_character_code == np_dtype.char:
-            return val
+            return val.from_numpy(np_dtype)
     raise ValueError(
-        f"numpy dtype '{dtype}' does not have a corresponding Zarr dtype in: {list(__data_type_registry)}."
+        f"numpy dtype '{dtype}' does not have a corresponding Zarr dtype in: {list(__data_type_registry.contents)}."
     )
 
 
diff --git a/tests/test_array.py b/tests/test_array.py
index 72c1bbf1b7..ce149d0f9a 100644
--- a/tests/test_array.py
+++ b/tests/test_array.py
@@ -43,7 +43,7 @@
 from zarr.core.common import JSON, MemoryOrder, ZarrFormat
 from zarr.core.group import AsyncGroup
 from zarr.core.indexing import BasicIndexer, ceildiv
-from zarr.core.metadata.v3 import ArrayV3Metadata, DataType
+from zarr.core.metadata.v3 import ArrayV3Metadata
 from zarr.core.sync import sync
 from zarr.errors import ContainsArrayError, ContainsGroupError
 from zarr.storage import LocalStore, MemoryStore, StorePath
@@ -509,7 +509,7 @@ def test_info_v3(self, chunks: tuple[int, int], shards: tuple[int, int] | None)
         result = arr.info
         expected = ArrayInfo(
             _zarr_format=3,
-            _data_type=DataType.parse("float64"),
+            _data_type=arr.metadata.data_type,
             _shape=(8, 8),
             _chunk_shape=chunks,
             _shard_shape=shards,
@@ -534,7 +534,7 @@ def test_info_complete(self, chunks: tuple[int, int], shards: tuple[int, int] |
         result = arr.info_complete()
         expected = ArrayInfo(
             _zarr_format=3,
-            _data_type=DataType.parse("float64"),
+            _data_type=arr.metadata.data_type,
             _shape=(8, 8),
             _chunk_shape=chunks,
             _shard_shape=shards,
@@ -594,7 +594,7 @@ async def test_info_v3_async(
         result = arr.info
         expected = ArrayInfo(
             _zarr_format=3,
-            _data_type=DataType.parse("float64"),
+            _data_type=arr.metadata.data_type,
             _shape=(8, 8),
             _chunk_shape=chunks,
             _shard_shape=shards,
@@ -621,7 +621,7 @@ async def test_info_complete_async(
         result = await arr.info_complete()
         expected = ArrayInfo(
             _zarr_format=3,
-            _data_type=DataType.parse("float64"),
+            _data_type=arr.metadata.data_type,
             _shape=(8, 8),
             _chunk_shape=chunks,
             _shard_shape=shards,
diff --git a/tests/test_metadata/test_v3.py b/tests/test_metadata/test_v3.py
index 4f6b2a5de6..74caf2ab43 100644
--- a/tests/test_metadata/test_v3.py
+++ b/tests/test_metadata/test_v3.py
@@ -12,14 +12,14 @@
 from zarr.core.chunk_key_encodings import DefaultChunkKeyEncoding, V2ChunkKeyEncoding
 from zarr.core.config import config
 from zarr.core.group import GroupMetadata, parse_node_type
+from zarr.core.metadata.dtype import complex_from_json
 from zarr.core.metadata.v3 import (
     ArrayV3Metadata,
-    default_fill_value,
     parse_dimension_names,
-    parse_fill_value,
     parse_zarr_format,
 )
 from zarr.errors import MetadataValidationError
+from zarr.registry import get_data_type_from_numpy
 
 if TYPE_CHECKING:
     from collections.abc import Sequence
@@ -107,90 +107,32 @@ def parse_dimension_names_valid(data: Sequence[str] | None) -> None:
     assert parse_dimension_names(data) == data
 
 
-@pytest.mark.parametrize("dtype_str", dtypes)
-def test_default_fill_value(dtype_str: str) -> None:
-    """
-    Test that parse_fill_value(None, dtype) results in the 0 value for the given dtype.
-    """
-    dtype = DataType(dtype_str)
-    fill_value = default_fill_value(dtype)
-    if dtype == DataType.string:
-        assert fill_value == ""
-    elif dtype == DataType.bytes:
-        assert fill_value == b""
-    else:
-        assert fill_value == dtype.to_numpy().type(0)
-
-
-@pytest.mark.parametrize(
-    ("fill_value", "dtype_str"),
-    [
-        (True, "bool"),
-        (False, "bool"),
-        (-8, "int8"),
-        (0, "int16"),
-        (1e10, "uint64"),
-        (-999, "float32"),
-        (1e32, "float64"),
-        (float("NaN"), "float64"),
-        (np.nan, "float64"),
-        (np.inf, "float64"),
-        (-1 * np.inf, "float64"),
-        (0j, "complex64"),
-    ],
-)
-def test_parse_fill_value_valid(fill_value: Any, dtype_str: str) -> None:
-    """
-    Test that parse_fill_value(fill_value, dtype) casts fill_value to the given dtype.
-    """
-    parsed = parse_fill_value(fill_value, dtype_str)
-
-    if np.isnan(fill_value):
-        assert np.isnan(parsed)
-    else:
-        assert parsed == DataType(dtype_str).to_numpy().type(fill_value)
-
-
-@pytest.mark.parametrize("fill_value", ["not a valid value"])
-@pytest.mark.parametrize("dtype_str", [*int_dtypes, *float_dtypes, *complex_dtypes])
-def test_parse_fill_value_invalid_value(fill_value: Any, dtype_str: str) -> None:
-    """
-    Test that parse_fill_value(fill_value, dtype) raises ValueError for invalid values.
-    This test excludes bool because the bool constructor takes anything.
-    """
-    with pytest.raises(ValueError):
-        parse_fill_value(fill_value, dtype_str)
-
-
-@pytest.mark.parametrize("fill_value", [[1.0, 0.0], [0, 1], complex(1, 1), np.complex64(0)])
+@pytest.mark.parametrize("fill_value", [[1.0, 0.0], [0, 1]])
 @pytest.mark.parametrize("dtype_str", [*complex_dtypes])
-def test_parse_fill_value_complex(fill_value: Any, dtype_str: str) -> None:
+def test_jsonify_fill_value_complex(fill_value: Any, dtype_str: str) -> None:
     """
     Test that parse_fill_value(fill_value, dtype) correctly handles complex values represented
     as length-2 sequences
     """
-    dtype = DataType(dtype_str)
-    if isinstance(fill_value, list):
-        expected = dtype.to_numpy().type(complex(*fill_value))
-    else:
-        expected = dtype.to_numpy().type(fill_value)
-    assert expected == parse_fill_value(fill_value, dtype_str)
+    zarr_format = 3
+    dtype = get_data_type_from_numpy(dtype_str)
+    expected = dtype.to_numpy().type(complex(*fill_value))
+    observed = dtype.from_json_value(fill_value, zarr_format=zarr_format)
+    assert observed == expected
+    assert dtype.to_json_value(observed, zarr_format=zarr_format) == tuple(fill_value)
 
 
-@pytest.mark.parametrize("fill_value", [[1.0, 0.0, 3.0], [0, 1, 3], [1]])
 @pytest.mark.parametrize("dtype_str", [*complex_dtypes])
-def test_parse_fill_value_complex_invalid(fill_value: Any, dtype_str: str) -> None:
+@pytest.mark.parametrize("data", [[1.0, 0.0, 3.0], [0, 1, 3], [1]])
+def test_complex_to_json_invalid(data: object, dtype_str: str) -> None:
     """
     Test that parse_fill_value(fill_value, dtype) correctly rejects sequences with length not
     equal to 2
     """
-    match = (
-        f"Got an invalid fill value for complex data type {dtype_str}."
-        f"Expected a sequence with 2 elements, but {fill_value} has "
-        f"length {len(fill_value)}."
-    )
-    with pytest.raises(ValueError, match=re.escape(match)):
-        parse_fill_value(fill_value=fill_value, dtype=dtype_str)
+    dtype_instance = get_data_type_from_numpy(dtype_str)
+    match = f"Invalid type: {data}. Expected a sequence of two numbers."
+    with pytest.raises(TypeError, match=re.escape(match)):
+        complex_from_json(data=data, dtype=dtype_instance, zarr_format=3)
 
 
 @pytest.mark.parametrize("fill_value", [{"foo": 10}])
@@ -200,8 +142,9 @@ def test_parse_fill_value_invalid_type(fill_value: Any, dtype_str: str) -> None:
     Test that parse_fill_value(fill_value, dtype) raises TypeError for invalid non-sequential types.
     This test excludes bool because the bool constructor takes anything.
     """
-    with pytest.raises(ValueError, match=r"fill value .* is not valid for dtype .*"):
-        parse_fill_value(fill_value, dtype_str)
+    dtype_instance = get_data_type_from_numpy(dtype_str)
+    with pytest.raises(TypeError, match=f"Invalid type: {fill_value}"):
+        dtype_instance.from_json_value(fill_value, zarr_format=3)
 
 
 @pytest.mark.parametrize(
@@ -220,9 +163,9 @@ def test_parse_fill_value_invalid_type_sequence(fill_value: Any, dtype_str: str)
     This test excludes bool because the bool constructor takes anything, and complex because
     complex values can be created from length-2 sequences.
     """
-    match = f"Cannot parse non-string sequence {fill_value} as a scalar with type {dtype_str}"
-    with pytest.raises(TypeError, match=re.escape(match)):
-        parse_fill_value(fill_value, dtype_str)
+    dtype_instance = get_data_type_from_numpy(dtype_str)
+    with pytest.raises(TypeError, match=re.escape(f"Invalid type: {fill_value}")):
+        dtype_instance.from_json_value(fill_value, zarr_format=3)
 
 
 @pytest.mark.parametrize("chunk_grid", ["regular"])
@@ -244,7 +187,7 @@ def test_metadata_to_dict(
     storage_transformers: tuple[dict[str, JSON]] | None,
 ) -> None:
     shape = (1, 2, 3)
-    data_type = DataType.uint8
+    data_type_str = "uint8"
     if chunk_grid == "regular":
         cgrid = {"name": "regular", "configuration": {"chunk_shape": (1, 1, 1)}}
 
@@ -268,7 +211,7 @@ def test_metadata_to_dict(
         "node_type": "array",
         "shape": shape,
         "chunk_grid": cgrid,
-        "data_type": data_type,
+        "data_type": data_type_str,
         "chunk_key_encoding": cke,
         "codecs": tuple(c.to_dict() for c in codecs),
         "fill_value": fill_value,
@@ -312,46 +255,26 @@ def test_json_indent(indent: int):
         assert d == json.dumps(json.loads(d), indent=indent).encode()
 
 
-# @pytest.mark.parametrize("fill_value", [-1, 0, 1, 2932897])
-# @pytest.mark.parametrize("precision", ["ns", "D"])
-# async def test_datetime_metadata(fill_value: int, precision: str) -> None:
-#     metadata_dict = {
-#         "zarr_format": 3,
-#         "node_type": "array",
-#         "shape": (1,),
-#         "chunk_grid": {"name": "regular", "configuration": {"chunk_shape": (1,)}},
-#         "data_type": f"<M8[{precision}]",
-#         "chunk_key_encoding": {"name": "default", "separator": "."},
-#         "codecs": (),
-#         "fill_value": np.datetime64(fill_value, precision),
-#     }
-#     metadata = ArrayV3Metadata.from_dict(metadata_dict)
-#     # ensure there isn't a TypeError here.
-#     d = metadata.to_buffer_dict(default_buffer_prototype())
-
-#     result = json.loads(d["zarr.json"].to_bytes())
-#     assert result["fill_value"] == fill_value
-
-
-def test_invalid_dtype_raises() -> None:
+@pytest.mark.xfail(reason="Data type not supported yet")
+@pytest.mark.parametrize("fill_value", [-1, 0, 1, 2932897])
+@pytest.mark.parametrize("precision", ["ns", "D"])
+async def test_datetime_metadata(fill_value: int, precision: str) -> None:
     metadata_dict = {
         "zarr_format": 3,
         "node_type": "array",
         "shape": (1,),
         "chunk_grid": {"name": "regular", "configuration": {"chunk_shape": (1,)}},
-        "data_type": "<M8[ns]",
+        "data_type": f"<M8[{precision}]",
         "chunk_key_encoding": {"name": "default", "separator": "."},
         "codecs": (),
-        "fill_value": np.datetime64(0, "ns"),
+        "fill_value": np.datetime64(fill_value, precision),
     }
-    with pytest.raises(ValueError, match=r"Invalid Zarr format 3 data_type: .*"):
-        ArrayV3Metadata.from_dict(metadata_dict)
-
+    metadata = ArrayV3Metadata.from_dict(metadata_dict)
+    # ensure there isn't a TypeError here.
+    d = metadata.to_buffer_dict(default_buffer_prototype())
 
-@pytest.mark.parametrize("data", ["datetime64[s]", "foo", object()])
-def test_parse_invalid_dtype_raises(data):
-    with pytest.raises(ValueError, match=r"Invalid Zarr format 3 data_type: .*"):
-        DataType.parse(data)
+    result = json.loads(d["zarr.json"].to_bytes())
+    assert result["fill_value"] == fill_value
 
 
 @pytest.mark.parametrize(

From 556e3902f0706514ca0b64075dc8b493ed2a26f2 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Thu, 27 Feb 2025 18:11:18 +0100
Subject: [PATCH 007/130] tweak json type guards

---
 src/zarr/core/metadata/dtype.py | 33 +++++++++++++++------------------
 1 file changed, 15 insertions(+), 18 deletions(-)

diff --git a/src/zarr/core/metadata/dtype.py b/src/zarr/core/metadata/dtype.py
index 8f940b0e0b..542cc85e5f 100644
--- a/src/zarr/core/metadata/dtype.py
+++ b/src/zarr/core/metadata/dtype.py
@@ -31,14 +31,15 @@ def endianness_to_numpy_str(endianness: Endianness | None) -> Literal[">", "<",
     )
 
 
-def check_str(data: JSON) -> TypeGuard[bool]:
+def check_json_bool(data: JSON) -> TypeGuard[bool]:
     return bool(isinstance(data, bool))
 
+def check_json_str(data: JSON) -> TypeGuard[str]:
+    return bool(isinstance(data, str))
 
-def check_int(data: JSON) -> TypeGuard[int]:
+def check_json_int(data: JSON) -> TypeGuard[int]:
     return bool(isinstance(data, int))
 
-
 def check_json_float(data: JSON) -> TypeGuard[float]:
     if data == "NaN" or data == "Infinity" or data == "-Infinity":
         return True
@@ -56,10 +57,6 @@ def check_json_complex_float(data: JSON) -> TypeGuard[tuple[JSONFloat, JSONFloat
     )
 
 
-def check_str(data: JSON) -> TypeGuard[str]:
-    return bool(isinstance(data, str))
-
-
 def float_to_json_v2(data: float | np.floating[Any]) -> JSONFloat:
     if np.isnan(data):
         return "NaN"
@@ -221,7 +218,7 @@ def to_json_value(self, data: np.generic, zarr_format: ZarrFormat) -> bool:
     def from_json_value(
         self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
     ) -> np.bool_:
-        if check_str(data):
+        if check_json_bool(data):
             return self.to_numpy(endianness=endianness).type(data)
         raise TypeError(f"Invalid type: {data}. Expected a boolean.")
 
@@ -246,7 +243,7 @@ def to_json_value(self, data: np.generic, zarr_format: ZarrFormat) -> int:
     def from_json_value(
         self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
     ) -> np.int8:
-        if check_int(data):
+        if check_json_int(data):
             return self.to_numpy(endianness=endianness).type(data)
         raise TypeError(f"Invalid type: {data}. Expected an integer.")
 
@@ -271,7 +268,7 @@ def to_json_value(self, data: np.generic, zarr_format: ZarrFormat) -> int:
     def from_json_value(
         self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
     ) -> np.uint8:
-        if check_int(data):
+        if check_json_int(data):
             return self.to_numpy(endianness=endianness).type(data)
         raise TypeError(f"Invalid type: {data}. Expected an integer.")
 
@@ -296,7 +293,7 @@ def to_json_value(self, data: np.generic, zarr_format: ZarrFormat) -> int:
     def from_json_value(
         self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
     ) -> np.int16:
-        if check_int(data):
+        if check_json_int(data):
             return self.to_numpy(endianness=endianness).type(data)
         raise TypeError(f"Invalid type: {data}. Expected an integer.")
 
@@ -321,7 +318,7 @@ def to_json_value(self, data: np.generic, zarr_format: ZarrFormat) -> int:
     def from_json_value(
         self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
     ) -> np.uint16:
-        if check_int(data):
+        if check_json_int(data):
             return self.to_numpy(endianness=endianness).type(data)
         raise TypeError(f"Invalid type: {data}. Expected an integer.")
 
@@ -346,7 +343,7 @@ def to_json_value(self, data: np.generic, zarr_format: ZarrFormat) -> int:
     def from_json_value(
         self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
     ) -> np.int32:
-        if check_int(data):
+        if check_json_int(data):
             return self.to_numpy(endianness=endianness).type(data)
         raise TypeError(f"Invalid type: {data}. Expected an integer.")
 
@@ -371,7 +368,7 @@ def to_json_value(self, data: np.generic, zarr_format: ZarrFormat) -> int:
     def from_json_value(
         self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
     ) -> np.uint32:
-        if check_int(data):
+        if check_json_int(data):
             return self.to_numpy(endianness=endianness).type(data)
         raise TypeError(f"Invalid type: {data}. Expected an integer.")
 
@@ -396,7 +393,7 @@ def to_json_value(self, data: np.generic, zarr_format: ZarrFormat) -> int:
     def from_json_value(
         self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
     ) -> np.int64:
-        if check_int(data):
+        if check_json_int(data):
             return self.to_numpy(endianness=endianness).type(data)
         raise TypeError(f"Invalid type: {data}. Expected an integer.")
 
@@ -421,7 +418,7 @@ def to_json_value(self, data: np.generic, zarr_format: ZarrFormat) -> int:
     def from_json_value(
         self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
     ) -> np.uint64:
-        if check_int(data):
+        if check_json_int(data):
             return self.to_numpy(endianness=endianness).type(data)
         raise TypeError(f"Invalid type: {data}. Expected an integer.")
 
@@ -592,7 +589,7 @@ def to_json_value(
     def from_json_value(
         self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
     ) -> np.bytes_:
-        if check_str(data):
+        if check_json_bool(data):
             return self.to_numpy(endianness=endianness).type(data.encode("ascii"))
         raise TypeError(f"Invalid type: {data}. Expected a string.")
 
@@ -722,7 +719,7 @@ def to_json_value(self, data: np.generic, *, zarr_format: ZarrFormat) -> str:
     def from_json_value(
         self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
     ) -> np.str_:
-        if not check_str(data):
+        if not check_json_bool(data):
             raise TypeError(f"Invalid type: {data}. Expected a string.")
         return self.to_numpy(endianness=endianness).type(data)
 

From b588f7025a86b6c003887d7a538b68b5c7025a28 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Thu, 27 Feb 2025 19:55:51 +0100
Subject: [PATCH 008/130] fix dtype sizes, adjust fill value parsing in
 from_dict, fix tests

---
 src/zarr/core/metadata/dtype.py | 15 ++++++++++-----
 src/zarr/core/metadata/v3.py    | 17 ++++++++++-------
 tests/test_metadata/test_v3.py  | 24 ++++++++++++------------
 3 files changed, 32 insertions(+), 24 deletions(-)

diff --git a/src/zarr/core/metadata/dtype.py b/src/zarr/core/metadata/dtype.py
index 542cc85e5f..008751adc5 100644
--- a/src/zarr/core/metadata/dtype.py
+++ b/src/zarr/core/metadata/dtype.py
@@ -34,12 +34,15 @@ def endianness_to_numpy_str(endianness: Endianness | None) -> Literal[">", "<",
 def check_json_bool(data: JSON) -> TypeGuard[bool]:
     return bool(isinstance(data, bool))
 
+
 def check_json_str(data: JSON) -> TypeGuard[str]:
     return bool(isinstance(data, str))
 
+
 def check_json_int(data: JSON) -> TypeGuard[int]:
     return bool(isinstance(data, int))
 
+
 def check_json_float(data: JSON) -> TypeGuard[float]:
     if data == "NaN" or data == "Infinity" or data == "-Infinity":
         return True
@@ -254,7 +257,7 @@ def from_json_value(
 @dataclass(frozen=True, kw_only=True)
 class UInt8(DTypeBase):
     name = "uint8"
-    item_size = 2
+    item_size = 1
     kind = "numeric"
     numpy_character_code = "B"
     default = 0
@@ -488,13 +491,15 @@ def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.Float64
         return super().to_numpy(endianness=endianness)
 
     def to_json_value(self, data: np.generic, zarr_format: ZarrFormat) -> float:
-        return float(data)
+        return float_to_json(data, zarr_format)
 
     def from_json_value(
         self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
     ) -> np.float64:
         if check_json_float(data):
-            return float_from_json(data, dtype=self.to_numpy(endianness=endianness))
+            return float_from_json(
+                data, dtype=self.to_numpy(endianness=endianness), zarr_format=zarr_format
+            )
         raise TypeError(f"Invalid type: {data}. Expected a float.")
 
 
@@ -504,7 +509,7 @@ def from_json_value(
 @dataclass(frozen=True, kw_only=True)
 class Complex64(DTypeBase):
     name = "complex64"
-    item_size = 16
+    item_size = 8
     kind = "numeric"
     numpy_character_code = "F"
     default = 0.0 + 0.0j
@@ -533,7 +538,7 @@ def from_json_value(
 @dataclass(frozen=True, kw_only=True)
 class Complex128(DTypeBase):
     name = "complex128"
-    item_size = 32
+    item_size = 16
     kind = "numeric"
     numpy_character_code = "D"
     default = 0.0 + 0.0j
diff --git a/src/zarr/core/metadata/v3.py b/src/zarr/core/metadata/v3.py
index b117e00dd0..ce1a8b77fa 100644
--- a/src/zarr/core/metadata/v3.py
+++ b/src/zarr/core/metadata/v3.py
@@ -263,28 +263,26 @@ def __init__(
         Because the class is a frozen dataclass, we set attributes using object.__setattr__
         """
         shape_parsed = parse_shapelike(shape)
-        data_type_parsed = data_type
         chunk_grid_parsed = ChunkGrid.from_dict(chunk_grid)
         chunk_key_encoding_parsed = ChunkKeyEncoding.from_dict(chunk_key_encoding)
         dimension_names_parsed = parse_dimension_names(dimension_names)
-        # we pass a string here rather than an enum to make mypy happy
-        fill_value_parsed = data_type_parsed.to_numpy().type(fill_value)
+        fill_value_parsed = data_type.to_numpy().type(fill_value)
         attributes_parsed = parse_attributes(attributes)
         codecs_parsed_partial = parse_codecs(codecs)
         storage_transformers_parsed = parse_storage_transformers(storage_transformers)
 
         array_spec = ArraySpec(
             shape=shape_parsed,
-            dtype=data_type_parsed.to_numpy(),
+            dtype=data_type.to_numpy(),
             fill_value=fill_value_parsed,
             config=ArrayConfig.from_dict({}),  # TODO: config is not needed here.
             prototype=default_buffer_prototype(),  # TODO: prototype is not needed here.
         )
         codecs_parsed = tuple(c.evolve_from_array_spec(array_spec) for c in codecs_parsed_partial)
-        validate_codecs(codecs_parsed_partial, data_type_parsed)
+        validate_codecs(codecs_parsed_partial, data_type)
 
         object.__setattr__(self, "shape", shape_parsed)
-        object.__setattr__(self, "data_type", data_type_parsed)
+        object.__setattr__(self, "data_type", data_type)
         object.__setattr__(self, "chunk_grid", chunk_grid_parsed)
         object.__setattr__(self, "chunk_key_encoding", chunk_key_encoding_parsed)
         object.__setattr__(self, "codecs", codecs_parsed)
@@ -405,11 +403,16 @@ def from_dict(cls, data: dict[str, JSON]) -> Self:
         else:
             data_type = get_data_type_from_dict(data_type_json)
 
+        # check that the fill value is consistent with the data type
+        fill_value_parsed = data_type.from_json_value(_data.pop("fill_value"), zarr_format=3)
+
         # dimension_names key is optional, normalize missing to `None`
         _data["dimension_names"] = _data.pop("dimension_names", None)
+
         # attributes key is optional, normalize missing to `None`
         _data["attributes"] = _data.pop("attributes", None)
-        return cls(**_data, data_type=data_type)  # type: ignore[arg-type]
+
+        return cls(**_data, fill_value=fill_value_parsed, data_type=data_type)  # type: ignore[arg-type]
 
     def to_dict(self) -> dict[str, JSON]:
         out_dict = super().to_dict()
diff --git a/tests/test_metadata/test_v3.py b/tests/test_metadata/test_v3.py
index 74caf2ab43..41d8b9a4d5 100644
--- a/tests/test_metadata/test_v3.py
+++ b/tests/test_metadata/test_v3.py
@@ -12,7 +12,7 @@
 from zarr.core.chunk_key_encodings import DefaultChunkKeyEncoding, V2ChunkKeyEncoding
 from zarr.core.config import config
 from zarr.core.group import GroupMetadata, parse_node_type
-from zarr.core.metadata.dtype import complex_from_json
+from zarr.core.metadata.dtype import Flexible, complex_from_json
 from zarr.core.metadata.v3 import (
     ArrayV3Metadata,
     parse_dimension_names,
@@ -278,7 +278,7 @@ async def test_datetime_metadata(fill_value: int, precision: str) -> None:
 
 
 @pytest.mark.parametrize(
-    ("data_type", "fill_value"), [("uint8", -1), ("int32", 22.5), ("float32", "foo")]
+    ("data_type", "fill_value"), [("uint8", {}), ("int32", [0, 1]), ("float32", "foo")]
 )
 async def test_invalid_fill_value_raises(data_type: str, fill_value: float) -> None:
     metadata_dict = {
@@ -288,10 +288,11 @@ async def test_invalid_fill_value_raises(data_type: str, fill_value: float) -> N
         "chunk_grid": {"name": "regular", "configuration": {"chunk_shape": (1,)}},
         "data_type": data_type,
         "chunk_key_encoding": {"name": "default", "separator": "."},
-        "codecs": (),
+        "codecs": ({"name": "bytes"},),
         "fill_value": fill_value,  # this is not a valid fill value for uint8
     }
-    with pytest.raises(ValueError, match=r"fill value .* is not valid for dtype .*"):
+    # multiple things can go wrong here, so we don't match on the error message.
+    with pytest.raises(TypeError):
         ArrayV3Metadata.from_dict(metadata_dict)
 
 
@@ -323,13 +324,12 @@ async def test_special_float_fill_values(fill_value: str) -> None:
 
 @pytest.mark.parametrize("dtype_str", dtypes)
 def test_dtypes(dtype_str: str) -> None:
-    dt = DataType(dtype_str)
+    dt = get_data_type_from_numpy(dtype_str)
     np_dtype = dt.to_numpy()
-    if dtype_str not in vlen_dtypes:
-        # we can round trip "normal" dtypes
-        assert dt == DataType.from_numpy(np_dtype)
-        assert dt.byte_count == np_dtype.itemsize
-        assert dt.has_endianness == (dt.byte_count > 1)
+
+    if not isinstance(dt, Flexible):
+        assert dt.item_size == np_dtype.itemsize
     else:
-        # return type for vlen types may vary depending on numpy version
-        assert dt.byte_count is None
+        assert dt.length == np_dtype.itemsize
+
+    assert dt.numpy_character_code == np_dtype.char

From 4ed41c6a9b5731d336239325ebcec3321c4ff585 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Sun, 2 Mar 2025 12:54:57 +0100
Subject: [PATCH 009/130] mid-refactor commit

---
 src/zarr/core/_info.py          |   4 +-
 src/zarr/core/array.py          |   6 +-
 src/zarr/core/metadata/dtype.py | 345 ++++++--------------------------
 src/zarr/core/metadata/v3.py    |   8 +-
 src/zarr/registry.py            |  18 +-
 5 files changed, 80 insertions(+), 301 deletions(-)

diff --git a/src/zarr/core/_info.py b/src/zarr/core/_info.py
index 2ede547600..6b594583e2 100644
--- a/src/zarr/core/_info.py
+++ b/src/zarr/core/_info.py
@@ -7,7 +7,7 @@
 
 from zarr.abc.codec import ArrayArrayCodec, ArrayBytesCodec, BytesBytesCodec
 from zarr.core.common import ZarrFormat
-from zarr.core.metadata.dtype import DTypeBase
+from zarr.core.metadata.dtype import DTypeWrapper
 
 # from zarr.core.metadata.v3 import DataType
 
@@ -80,7 +80,7 @@ class ArrayInfo:
 
     _type: Literal["Array"] = "Array"
     _zarr_format: ZarrFormat
-    _data_type: np.dtype[Any] | DTypeBase
+    _data_type: np.dtype[Any] | DTypeWrapper
     _shape: tuple[int, ...]
     _shard_shape: tuple[int, ...] | None = None
     _chunk_shape: tuple[int, ...] | None = None
diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py
index c4da46da92..975408a01d 100644
--- a/src/zarr/core/array.py
+++ b/src/zarr/core/array.py
@@ -98,7 +98,7 @@
     ArrayV3MetadataDict,
     T_ArrayMetadata,
 )
-from zarr.core.metadata.dtype import DTypeBase
+from zarr.core.metadata.dtype import DTypeWrapper
 from zarr.core.metadata.v2 import (
     _default_compressor,
     _default_filters,
@@ -699,7 +699,7 @@ def _create_metadata_v3(
 
         if fill_value is None:
             # v3 spec will not allow a null fill value
-            fill_value_parsed = dtype.type(zarr_data_type.default)
+            fill_value_parsed = dtype.type(zarr_data_type._default_value)
         else:
             fill_value_parsed = fill_value
 
@@ -1694,7 +1694,7 @@ async def info_complete(self) -> Any:
     def _info(
         self, count_chunks_initialized: int | None = None, count_bytes_stored: int | None = None
     ) -> Any:
-        _data_type: np.dtype[Any] | DTypeBase
+        _data_type: np.dtype[Any] | DTypeWrapper
         if isinstance(self.metadata, ArrayV2Metadata):
             _data_type = self.metadata.dtype
         else:
diff --git a/src/zarr/core/metadata/dtype.py b/src/zarr/core/metadata/dtype.py
index 008751adc5..106b3088d0 100644
--- a/src/zarr/core/metadata/dtype.py
+++ b/src/zarr/core/metadata/dtype.py
@@ -1,7 +1,7 @@
 from abc import ABC, abstractmethod
 from collections.abc import Sequence
 from dataclasses import dataclass
-from typing import Any, ClassVar, Literal, Self, TypeGuard, cast, get_args
+from typing import Any, ClassVar, Generic, Literal, Self, TypeGuard, TypeVar, cast, get_args
 
 import numpy as np
 import numpy.typing as npt
@@ -148,44 +148,28 @@ def complex_from_json(
             raise TypeError(f"Invalid type: {data}. Expected a sequence of two numbers.")
     raise ValueError(f"Invalid zarr format: {zarr_format}. Expected 2 or 3.")
 
+TDType = TypeVar("TDType", bound=np.dtype[Any])
+TScalar = TypeVar("TScalar", bound=np.generic)
 
 @dataclass(frozen=True, kw_only=True)
 class Flexible:
     length: int
 
-
-class DTypeBase(ABC, Metadata):
+class DTypeWrapper(Generic[TDType, TScalar], ABC, Metadata):
     name: ClassVar[str]
-    numpy_character_code: ClassVar[str]
-    item_size: ClassVar[int | None]
+    dtype_cls: ClassVar[type[TDType]] # this class will create a numpy dtype
     kind: ClassVar[DataTypeFlavor]
-    default: object
-
-    def __init_subclass__(cls, **kwargs: object) -> None:
-        required_attrs = ["name", "numpy_character_code", "item_size", "kind", "default"]
-        for attr in required_attrs:
-            if not hasattr(cls, attr):
-                raise ValueError(f"{attr} is a required attribute for a Zarr dtype.")
-
-        return super().__init_subclass__(**kwargs)
+    _default_value: object
 
     def to_dict(self) -> dict[str, JSON]:
         return {"name": self.name}
 
-    @classmethod
-    def from_numpy(cls, dtype: npt.DTypeLike) -> Self:
-        if np.dtype(dtype).char != cls.numpy_character_code:
-            raise ValueError(
-                f"Invalid dtype {dtype}. Expected dtype with character code == {cls.numpy_character_code}."
-            )
-        return cls()
-
-    def default_value(self: Self, *, endianness: Endianness | None = None) -> np.generic:
-        return cast(np.generic, self.to_numpy(endianness=endianness).type(self.default))
+    def default_value(self: Self, *, endianness: Endianness | None = None) -> TScalar:
+        return cast(np.generic, self.to_numpy(endianness=endianness).type(self._default_value))
 
-    def to_numpy(self: Self, *, endianness: Endianness | None = None) -> np.dtype[Any]:
+    def to_numpy(self: Self, *, endianness: Endianness | None = None) -> TDType:
         endian_str = endianness_to_numpy_str(endianness)
-        return np.dtype(endian_str + self.numpy_character_code)
+        return self.dtype_cls().newbyteorder(endian_str)
 
     @abstractmethod
     def to_json_value(self, data: np.generic, *, zarr_format: ZarrFormat) -> JSON:
@@ -197,7 +181,7 @@ def to_json_value(self, data: np.generic, *, zarr_format: ZarrFormat) -> JSON:
     @abstractmethod
     def from_json_value(
         self: Self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
-    ) -> np.generic:
+    ) -> TScalar:
         """
         Read a JSON-serializable value as a numpy scalar
         """
@@ -205,16 +189,11 @@ def from_json_value(
 
 
 @dataclass(frozen=True, kw_only=True)
-class Bool(DTypeBase):
+class Bool(DTypeWrapper[np.dtypes.BoolDType, np.bool_]):
     name = "bool"
-    item_size = 1
     kind = "boolean"
-    numpy_character_code = "?"
     default = False
 
-    def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.BoolDType:
-        return super().to_numpy(endianness=endianness)
-
     def to_json_value(self, data: np.generic, zarr_format: ZarrFormat) -> bool:
         return bool(data)
 
@@ -228,295 +207,128 @@ def from_json_value(
 
 register_data_type(Bool)
 
-
-@dataclass(frozen=True, kw_only=True)
-class Int8(DTypeBase):
-    name = "int8"
-    item_size = 1
+class BaseInt(DTypeWrapper[TDType, TScalar]):
     kind = "numeric"
-    numpy_character_code = "b"
     default = 0
 
-    def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.Int8DType:
-        return super().to_numpy(endianness=endianness)
-
     def to_json_value(self, data: np.generic, zarr_format: ZarrFormat) -> int:
         return int(data)
 
     def from_json_value(
         self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
-    ) -> np.int8:
+    ) -> TScalar:
         if check_json_int(data):
             return self.to_numpy(endianness=endianness).type(data)
         raise TypeError(f"Invalid type: {data}. Expected an integer.")
 
 
+@dataclass(frozen=True, kw_only=True)
+class Int8(BaseInt[np.dtypes.Int8DType, np.int8]):
+    name = "int8"
+
+
 register_data_type(Int8)
 
 
 @dataclass(frozen=True, kw_only=True)
-class UInt8(DTypeBase):
+class UInt8(DTypeWrapper[np.dtypes.UInt8DType, np.uint8]):
     name = "uint8"
-    item_size = 1
-    kind = "numeric"
-    numpy_character_code = "B"
-    default = 0
-
-    def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.UInt8DType:
-        return super().to_numpy(endianness=endianness)
-
-    def to_json_value(self, data: np.generic, zarr_format: ZarrFormat) -> int:
-        return int(data)
-
-    def from_json_value(
-        self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
-    ) -> np.uint8:
-        if check_json_int(data):
-            return self.to_numpy(endianness=endianness).type(data)
-        raise TypeError(f"Invalid type: {data}. Expected an integer.")
 
 
 register_data_type(UInt8)
 
 
 @dataclass(frozen=True, kw_only=True)
-class Int16(DTypeBase):
+class Int16(DTypeWrapper[np.dtypes.Int16DType, np.int16]):
     name = "int16"
-    item_size = 2
-    kind = "numeric"
-    numpy_character_code = "h"
-    default = 0
-
-    def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.Int16DType:
-        return super().to_numpy(endianness=endianness)
-
-    def to_json_value(self, data: np.generic, zarr_format: ZarrFormat) -> int:
-        return int(data)
-
-    def from_json_value(
-        self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
-    ) -> np.int16:
-        if check_json_int(data):
-            return self.to_numpy(endianness=endianness).type(data)
-        raise TypeError(f"Invalid type: {data}. Expected an integer.")
 
 
 register_data_type(Int16)
 
 
 @dataclass(frozen=True, kw_only=True)
-class UInt16(DTypeBase):
+class UInt16(DTypeWrapper[np.dtypes.UInt16DType, np.uint16]):
     name = "uint16"
-    item_size = 2
-    kind = "numeric"
-    numpy_character_code = "H"
-    default = 0
-
-    def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.UInt16DType:
-        return super().to_numpy(endianness=endianness)
-
-    def to_json_value(self, data: np.generic, zarr_format: ZarrFormat) -> int:
-        return int(data)
-
-    def from_json_value(
-        self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
-    ) -> np.uint16:
-        if check_json_int(data):
-            return self.to_numpy(endianness=endianness).type(data)
-        raise TypeError(f"Invalid type: {data}. Expected an integer.")
-
 
 register_data_type(UInt16)
 
 
 @dataclass(frozen=True, kw_only=True)
-class Int32(DTypeBase):
+class Int32(DTypeWrapper[np.dtypes.Int32DType, np.int32]):
     name = "int32"
-    item_size = 4
-    kind = "numeric"
-    numpy_character_code = "i"
-    default = 0
-
-    def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.Int32DType:
-        return super().to_numpy(endianness=endianness)
-
-    def to_json_value(self, data: np.generic, zarr_format: ZarrFormat) -> int:
-        return int(data)
-
-    def from_json_value(
-        self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
-    ) -> np.int32:
-        if check_json_int(data):
-            return self.to_numpy(endianness=endianness).type(data)
-        raise TypeError(f"Invalid type: {data}. Expected an integer.")
 
 
 register_data_type(Int32)
 
 
 @dataclass(frozen=True, kw_only=True)
-class UInt32(DTypeBase):
+class UInt32(DTypeWrapper[np.dtypes.UInt32DType, np.uint32]):
     name = "uint32"
-    item_size = 4
-    kind = "numeric"
-    numpy_character_code = "I"
-    default = 0
-
-    def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.UInt32DType:
-        return super().to_numpy(endianness=endianness)
-
-    def to_json_value(self, data: np.generic, zarr_format: ZarrFormat) -> int:
-        return int(data)
-
-    def from_json_value(
-        self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
-    ) -> np.uint32:
-        if check_json_int(data):
-            return self.to_numpy(endianness=endianness).type(data)
-        raise TypeError(f"Invalid type: {data}. Expected an integer.")
-
 
 register_data_type(UInt32)
 
 
 @dataclass(frozen=True, kw_only=True)
-class Int64(DTypeBase):
+class Int64(DTypeWrapper[np.dtypes.Int64DType, np.int64]):
     name = "int64"
-    item_size = 8
-    kind = "numeric"
-    numpy_character_code = "l"
-    default = 0
-
-    def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.Int64DType:
-        return super().to_numpy(endianness=endianness)
-
-    def to_json_value(self, data: np.generic, zarr_format: ZarrFormat) -> int:
-        return int(data)
-
-    def from_json_value(
-        self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
-    ) -> np.int64:
-        if check_json_int(data):
-            return self.to_numpy(endianness=endianness).type(data)
-        raise TypeError(f"Invalid type: {data}. Expected an integer.")
 
 
 register_data_type(Int64)
 
 
 @dataclass(frozen=True, kw_only=True)
-class UInt64(DTypeBase):
+class UInt64(DTypeWrapper[np.dtypes.UInt64DType, np.uint64]):
     name = "uint64"
-    item_size = 8
-    kind = "numeric"
-    numpy_character_code = "L"
-    default = 0
 
-    def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.UInt64DType:
-        return super().to_numpy(endianness=endianness)
-
-    def to_json_value(self, data: np.generic, zarr_format: ZarrFormat) -> int:
-        return int(data)
-
-    def from_json_value(
-        self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
-    ) -> np.uint64:
-        if check_json_int(data):
-            return self.to_numpy(endianness=endianness).type(data)
-        raise TypeError(f"Invalid type: {data}. Expected an integer.")
 
 
 register_data_type(UInt64)
 
 
-@dataclass(frozen=True, kw_only=True)
-class Float16(DTypeBase):
-    name = "float16"
-    item_size = 2
+class FloatBase(DTypeWrapper[TDType, TScalar]):
     kind = "numeric"
-    numpy_character_code = "e"
     default = 0.0
 
-    def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.Float16DType:
-        return super().to_numpy(endianness=endianness)
-
-    def to_json_value(self, data: np.generic, zarr_format: ZarrFormat) -> float:
-        return float(data)
+    def to_json_value(self, data: np.generic, zarr_format: ZarrFormat) -> JSONFloat:
+        return float_to_json(data, zarr_format)
 
     def from_json_value(
         self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
-    ) -> np.float16:
+    ) -> TScalar:
         if check_json_float(data):
-            return self.to_numpy(endianness=endianness).type(data)
+            return self.to_numpy(endianness=endianness).type(float_from_json)(data, zarr_format)
         raise TypeError(f"Invalid type: {data}. Expected a float.")
 
+@dataclass(frozen=True, kw_only=True)
+class Float16(DTypeWrapper[np.dtypes.Float16DType, np.float16]):
+    name = "float16"
+
 
 register_data_type(Float16)
 
 
 @dataclass(frozen=True, kw_only=True)
-class Float32(DTypeBase):
+class Float32(DTypeWrapper[np.dtypes.Float32DType, np.float32]):
     name = "float32"
-    item_size = 4
-    kind = "numeric"
-    numpy_character_code = "f"
-    default = 0.0
-
-    def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.Float32DType:
-        return super().to_numpy(endianness=endianness)
-
-    def to_json_value(self, data: np.generic, zarr_format: ZarrFormat) -> float:
-        return float(data)
-
-    def from_json_value(
-        self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
-    ) -> np.float32:
-        if check_json_float(data):
-            return self.to_numpy(endianness=endianness).type(data)
-        raise TypeError(f"Invalid type: {data}. Expected a float.")
-
+ 
 
 register_data_type(Float32)
 
 
 @dataclass(frozen=True, kw_only=True)
-class Float64(DTypeBase):
+class Float64(DTypeWrapper[np.dtypes.Float64DType, np.float64]):
     name = "float64"
-    item_size = 8
-    kind = "numeric"
-    numpy_character_code = "d"
-    default = 0.0
-
-    def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.Float64DType:
-        return super().to_numpy(endianness=endianness)
-
-    def to_json_value(self, data: np.generic, zarr_format: ZarrFormat) -> float:
-        return float_to_json(data, zarr_format)
-
-    def from_json_value(
-        self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
-    ) -> np.float64:
-        if check_json_float(data):
-            return float_from_json(
-                data, dtype=self.to_numpy(endianness=endianness), zarr_format=zarr_format
-            )
-        raise TypeError(f"Invalid type: {data}. Expected a float.")
 
 
 register_data_type(Float64)
 
 
 @dataclass(frozen=True, kw_only=True)
-class Complex64(DTypeBase):
+class Complex64(DTypeWrapper[np.dtypes.Complex64DType, np.complex64]):
     name = "complex64"
-    item_size = 8
     kind = "numeric"
-    numpy_character_code = "F"
     default = 0.0 + 0.0j
 
-    def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.Complex64DType:
-        return super().to_numpy(endianness=endianness)
-
     def to_json_value(
         self, data: np.generic, zarr_format: ZarrFormat
     ) -> tuple[JSONFloat, JSONFloat]:
@@ -536,16 +348,12 @@ def from_json_value(
 
 
 @dataclass(frozen=True, kw_only=True)
-class Complex128(DTypeBase):
+class Complex128(DTypeWrapper[np.dtypes.Complex128DType, np.complex128]):
     name = "complex128"
-    item_size = 16
     kind = "numeric"
-    numpy_character_code = "D"
+    dtype_cls = np.dtypes.Complex128DType
     default = 0.0 + 0.0j
 
-    def to_numpy(self, *, endianness: Endianness | None = None) -> np.dtypes.Complex128DType:
-        return super().to_numpy(endianness=endianness)
-
     def to_json_value(
         self, data: np.generic, zarr_format: ZarrFormat
     ) -> tuple[JSONFloat, JSONFloat]:
@@ -565,26 +373,17 @@ def from_json_value(
 
 
 @dataclass(frozen=True, kw_only=True)
-class StaticByteString(DTypeBase, Flexible):
+class StaticByteString(DTypeWrapper[np.dtypes.BytesDType, np.bytes_], Flexible):
     name = "numpy/static_byte_string"
     kind = "string"
-    numpy_character_code = "S"
-    item_size = 1
     default = b""
 
-    @classmethod
-    def from_numpy(cls: type[Self], dtype: npt.DTypeLike) -> Self:
-        dtype = np.dtype(dtype)
-        if dtype.kind != cls.numpy_character_code:
-            raise ValueError(f"Invalid dtype {dtype}. Expected a string dtype.")
-        return cls(length=dtype.itemsize)
-
     def to_dict(self) -> dict[str, JSON]:
         return {"name": self.name, "configuration": {"capacity": self.length}}
 
-    def to_numpy(self, endianness: Endianness | None = "native") -> np.dtype[np.bytes_]:
+    def to_numpy(self, endianness: Endianness | None = "native") -> np.dtypes.BytesDType:
         endianness_code = endianness_to_numpy_str(endianness)
-        return np.dtype(endianness_code + self.numpy_character_code + str(self.length))
+        return self.dtype_cls(self.length).newbyteorder(endianness_code)
 
     def to_json_value(
         self, data: np.generic, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
@@ -603,26 +402,20 @@ def from_json_value(
 
 
 @dataclass(frozen=True, kw_only=True)
-class StaticRawBytes(DTypeBase, Flexible):
+class StaticRawBytes(DTypeWrapper[np.dtypes.VoidDType, np.void], Flexible):
     name = "r*"
     kind = "bytes"
-    numpy_character_code = "V"
-    item_size = 1
     default = b""
 
-    @classmethod
-    def from_numpy(cls: type[Self], dtype: npt.DTypeLike) -> Self:
-        dtype = np.dtype(dtype)
-        if dtype.kind != "V":
-            raise ValueError(f"Invalid dtype {dtype}. Expected a bytes dtype.")
-        return cls(length=dtype.itemsize)
 
     def to_dict(self) -> dict[str, JSON]:
         return {"name": f"r{self.length * 8}"}
 
-    def to_numpy(self, endianness: Endianness | None = "native") -> np.dtype[np.void]:
+    def to_numpy(self, endianness: Endianness | None = "native") -> np.dtypes.VoidDType:
+        # this needs to be overridden because numpy does not allow creating a void type
+        # by invoking np.dtypes.VoidDType directly
         endianness_code = endianness_to_numpy_str(endianness)
-        return np.dtype(endianness_code + self.numpy_character_code + str(self.length))
+        return np.dtype(f'{endianness_code}V{self.length}')
 
     def to_json_value(self, data: np.generic, *, zarr_format: ZarrFormat) -> tuple[int, ...]:
         return tuple(*data.tobytes())
@@ -639,13 +432,10 @@ def from_json_value(
 if _NUMPY_SUPPORTS_VLEN_STRING:
 
     @dataclass(frozen=True, kw_only=True)
-    class VlenString(DTypeBase):
+    class VlenString(DTypeWrapper[np.dtypes.StringDType, str]):
         name = "numpy/vlen_string"
         kind = "string"
-        numpy_character_code = "T"
-        # this uses UTF-8, so the encoding of a code point varies between
-        # 1 and 4 bytes
-        item_size = None
+        dtype_cls = np.dtypes.StringDType
         default = ""
 
         def to_dict(self) -> dict[str, JSON]:
@@ -653,7 +443,7 @@ def to_dict(self) -> dict[str, JSON]:
 
         def to_numpy(
             self, endianness: Endianness | None = "native"
-        ) -> np.dtype[np.dtypes.StringDType]:
+        ) -> np.dtypes.StringDType:
             endianness_code = endianness_to_numpy_str(endianness)
             return np.dtype(endianness_code + self.numpy_character_code)
 
@@ -662,34 +452,32 @@ def to_json_value(self, data: np.generic, *, zarr_format: ZarrFormat) -> str:
 
         def from_json_value(
             self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
-        ) -> "np.dtypes.StringDType":
+        ) -> str:
             return self.to_numpy(endianness=endianness).type(data)
 
 else:
 
     @dataclass(frozen=True, kw_only=True)
-    class VlenString(DTypeBase):
+    class VlenString(DTypeWrapper[np.dtypes.ObjectDType, str]):
         name = "numpy/vlen_string"
         kind = "string"
-        numpy_character_code = "O"
-        item_size = None
+        dtype_cls = np.dtypes.ObjectDType
         default = ""
 
         def to_dict(self) -> dict[str, JSON]:
             return {"name": self.name}
 
         def to_numpy(
-            self, endianness: Endianness | None = "native"
+            self, endianness: Endianness | None = None
         ) -> np.dtype[np.dtypes.ObjectDType]:
-            endianness_code = endianness_to_numpy_str(endianness)
-            return np.dtype(endianness_code + self.numpy_character_code)
+            return super().to_numpy(endianness=endianness)
 
         def to_json_value(self, data, *, zarr_format: ZarrFormat) -> str:
             return str(data)
 
         def from_json_value(
             self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
-        ) -> np.dtypes.ObjectDType:
+        ) -> str:
             return self.to_numpy(endianness=endianness).type(data)
 
 
@@ -697,24 +485,15 @@ def from_json_value(
 
 
 @dataclass(frozen=True, kw_only=True)
-class StaticUnicodeString(DTypeBase, Flexible):
+class StaticUnicodeString(DTypeWrapper[np.dtypes.StrDType, np.str_], Flexible):
     name = "numpy/static_unicode_string"
     kind = "string"
-    numpy_character_code = "U"
-    item_size = 4
     default = ""
 
-    @classmethod
-    def from_numpy(cls: type[Self], dtype: npt.DTypeLike) -> Self:
-        dtype = np.dtype(dtype)
-        if dtype.kind != "U":
-            raise ValueError(f"Invalid dtype {dtype}. Expected a string dtype.")
-        return cls(length=dtype.itemsize)
-
     def to_dict(self) -> dict[str, JSON]:
         return {"name": self.name, "configuration": {"capacity": self.length}}
 
-    def to_numpy(self, endianness: Endianness | None = "native") -> np.dtype[np.str_]:
+    def to_numpy(self, endianness: Endianness | None = "native") -> np.dtypes.StrDType:
         endianness_code = endianness_to_numpy_str(endianness)
         return np.dtype(endianness_code + self.numpy_character_code + str(self.length))
 
@@ -732,10 +511,10 @@ def from_json_value(
 register_data_type(StaticUnicodeString)
 
 
-def resolve_dtype(dtype: npt.DTypeLike | DTypeBase | dict[str, JSON]) -> DTypeBase:
+def resolve_dtype(dtype: npt.DTypeLike | DTypeWrapper | dict[str, JSON]) -> DTypeWrapper:
     from zarr.registry import get_data_type_from_numpy
 
-    if isinstance(dtype, DTypeBase):
+    if isinstance(dtype, DTypeWrapper):
         return dtype
     elif isinstance(dtype, dict):
         return get_data_type_from_dict(dtype)
diff --git a/src/zarr/core/metadata/v3.py b/src/zarr/core/metadata/v3.py
index ce1a8b77fa..6d2f8d35e7 100644
--- a/src/zarr/core/metadata/v3.py
+++ b/src/zarr/core/metadata/v3.py
@@ -13,7 +13,7 @@
     from zarr.core.chunk_grids import ChunkGrid
     from zarr.core.common import JSON, ChunkCoords
     from zarr.core.metadata.dtype import (
-        DTypeBase,
+        DTypeWrapper,
     )
 
 import json
@@ -92,7 +92,7 @@ def validate_array_bytes_codec(codecs: tuple[Codec, ...]) -> ArrayBytesCodec:
     return abcs[0]
 
 
-def validate_codecs(codecs: tuple[Codec, ...], dtype: DTypeBase) -> None:
+def validate_codecs(codecs: tuple[Codec, ...], dtype: DTypeWrapper) -> None:
     """Check that the codecs are valid for the given dtype"""
     from zarr.codecs.sharding import ShardingCodec
 
@@ -235,7 +235,7 @@ class ArrayV3MetadataDict(TypedDict):
 @dataclass(frozen=True, kw_only=True)
 class ArrayV3Metadata(Metadata):
     shape: ChunkCoords
-    data_type: DTypeBase
+    data_type: DTypeWrapper
     chunk_grid: ChunkGrid
     chunk_key_encoding: ChunkKeyEncoding
     fill_value: Any
@@ -250,7 +250,7 @@ def __init__(
         self,
         *,
         shape: Iterable[int],
-        data_type: DTypeBase,
+        data_type: DTypeWrapper,
         chunk_grid: dict[str, JSON] | ChunkGrid,
         chunk_key_encoding: ChunkKeyEncodingLike,
         fill_value: object,
diff --git a/src/zarr/registry.py b/src/zarr/registry.py
index 7ad688a61a..1b8ecc7a92 100644
--- a/src/zarr/registry.py
+++ b/src/zarr/registry.py
@@ -25,7 +25,7 @@
     from zarr.core.buffer import Buffer, NDBuffer
     from zarr.core.common import JSON
     from zarr.core.dtype import ZarrDType
-    from zarr.core.metadata.dtype import DTypeBase
+    from zarr.core.metadata.dtype import DTypeWrapper
 
 __all__ = [
     "Registry",
@@ -63,7 +63,7 @@ def register(self, cls: type[T]) -> None:
 
 @dataclass(frozen=True, kw_only=True)
 class DataTypeRegistry:
-    contents: dict[str, type[DTypeBase]] = field(default_factory=dict, init=False)
+    contents: dict[str, type[DTypeWrapper]] = field(default_factory=dict, init=False)
     lazy_load_list: list[EntryPoint] = field(default_factory=list, init=False)
 
     def lazy_load(self) -> None:
@@ -72,14 +72,14 @@ def lazy_load(self) -> None:
 
         self.lazy_load_list.clear()
 
-    def register(self: Self, cls: type[DTypeBase], clobber: bool = False) -> None:
+    def register(self: Self, cls: type[DTypeWrapper], clobber: bool = False) -> None:
         if cls.name in self.contents and not clobber:
             raise ValueError(
                 f"Data type {cls.name} already registered. Use clobber=True to overwrite."
             )
         self.contents[cls.name] = cls
 
-    def get(self, key: str) -> type[DTypeBase]:
+    def get(self, key: str) -> type[DTypeWrapper]:
         return self.contents[key]
 
 
@@ -178,7 +178,7 @@ def register_buffer(cls: type[Buffer]) -> None:
     __buffer_registry.register(cls)
 
 
-def register_data_type(cls: type[DTypeBase]) -> None:
+def register_data_type(cls: type[DTypeWrapper]) -> None:
     __data_type_registry.register(cls)
 
 
@@ -326,7 +326,7 @@ def get_ndbuffer_class(reload_config: bool = False) -> type[NDBuffer]:
     )
 
 
-def get_data_type_by_name(dtype: str, configuration: dict[str, JSON] | None = None) -> DTypeBase:
+def get_data_type_by_name(dtype: str, configuration: dict[str, JSON] | None = None) -> DTypeWrapper:
     __data_type_registry.lazy_load()
     if configuration is None:
         _configuration = {}
@@ -338,7 +338,7 @@ def get_data_type_by_name(dtype: str, configuration: dict[str, JSON] | None = No
     return maybe_dtype_cls.from_dict(_configuration)
 
 
-def get_data_type_from_dict(dtype: dict[str, JSON]) -> DTypeBase:
+def get_data_type_from_dict(dtype: dict[str, JSON]) -> DTypeWrapper:
     __data_type_registry.lazy_load()
     dtype_name = dtype["name"]
     dtype_cls = __data_type_registry.get(dtype_name)
@@ -347,12 +347,12 @@ def get_data_type_from_dict(dtype: dict[str, JSON]) -> DTypeBase:
     return dtype_cls.from_dict(dtype.get("configuration", {}))
 
 
-def get_data_type_from_numpy(dtype: npt.DTypeLike) -> DTypeBase:
+def get_data_type_from_numpy(dtype: npt.DTypeLike) -> DTypeWrapper:
     np_dtype = np.dtype(dtype)
     __data_type_registry.lazy_load()
     for val in __data_type_registry.contents.values():
         if val.numpy_character_code == np_dtype.char:
-            return val.from_numpy(np_dtype)
+            return val.from_str(np_dtype)
     raise ValueError(
         f"numpy dtype '{dtype}' does not have a corresponding Zarr dtype in: {list(__data_type_registry.contents)}."
     )

From 1b2c773fca1f92caef8b33f41865a31df4e8fa26 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Sun, 2 Mar 2025 19:44:43 +0100
Subject: [PATCH 010/130] working form for dtype classes

---
 src/zarr/core/array.py          |   2 +-
 src/zarr/core/metadata/dtype.py | 366 ++++++++++++++++++--------------
 src/zarr/core/metadata/v3.py    |   8 +-
 src/zarr/registry.py            |  14 +-
 tests/test_codecs/test_vlen.py  |   8 +-
 tests/test_metadata/test_v3.py  |  23 +-
 6 files changed, 234 insertions(+), 187 deletions(-)

diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py
index 975408a01d..7edd467a54 100644
--- a/src/zarr/core/array.py
+++ b/src/zarr/core/array.py
@@ -699,7 +699,7 @@ def _create_metadata_v3(
 
         if fill_value is None:
             # v3 spec will not allow a null fill value
-            fill_value_parsed = dtype.type(zarr_data_type._default_value)
+            fill_value_parsed = zarr_data_type.default_value
         else:
             fill_value_parsed = fill_value
 
diff --git a/src/zarr/core/metadata/dtype.py b/src/zarr/core/metadata/dtype.py
index 106b3088d0..1b57831943 100644
--- a/src/zarr/core/metadata/dtype.py
+++ b/src/zarr/core/metadata/dtype.py
@@ -5,11 +5,12 @@
 
 import numpy as np
 import numpy.typing as npt
+from typing_extensions import get_original_bases
 
 from zarr.abc.metadata import Metadata
 from zarr.core.common import JSON, ZarrFormat
 from zarr.core.strings import _NUMPY_SUPPORTS_VLEN_STRING
-from zarr.registry import get_data_type_from_dict, register_data_type
+from zarr.registry import register_data_type
 
 Endianness = Literal["little", "big", "native"]
 DataTypeFlavor = Literal["boolean", "numeric", "string", "bytes"]
@@ -32,34 +33,80 @@ def endianness_to_numpy_str(endianness: Endianness | None) -> Literal[">", "<",
 
 
 def check_json_bool(data: JSON) -> TypeGuard[bool]:
+    """
+    Check if a JSON value represents a boolean.
+    """
     return bool(isinstance(data, bool))
 
 
 def check_json_str(data: JSON) -> TypeGuard[str]:
+    """
+    Check if a JSON value represents a string.
+    """
     return bool(isinstance(data, str))
 
 
 def check_json_int(data: JSON) -> TypeGuard[int]:
+    """
+    Check if a JSON value represents an integer.
+    """
     return bool(isinstance(data, int))
 
 
-def check_json_float(data: JSON) -> TypeGuard[float]:
+def check_json_float_v2(data: JSON) -> TypeGuard[float]:
     if data == "NaN" or data == "Infinity" or data == "-Infinity":
         return True
     else:
         return bool(isinstance(data, float | int))
 
 
-def check_json_complex_float(data: JSON) -> TypeGuard[tuple[JSONFloat, JSONFloat]]:
+def check_json_float_v3(data: JSON) -> TypeGuard[float]:
+    # TODO: handle the special JSON serialization of different NaN values
+    return check_json_float_v2(data)
+
+
+def check_json_float(data: JSON, zarr_format: ZarrFormat) -> TypeGuard[float]:
+    if zarr_format == 2:
+        return check_json_float_v2(data)
+    else:
+        return check_json_float_v3(data)
+
+
+def check_json_complex_float_v3(data: JSON) -> TypeGuard[tuple[JSONFloat, JSONFloat]]:
+    """
+    Check if a JSON value represents a complex float, as per the zarr v3 spec
+    """
     return (
         not isinstance(data, str)
         and isinstance(data, Sequence)
         and len(data) == 2
-        and check_json_float(data[0])
-        and check_json_float(data[1])
+        and check_json_float_v3(data[0])
+        and check_json_float_v3(data[1])
     )
 
 
+def check_json_complex_float_v2(data: JSON) -> TypeGuard[tuple[JSONFloat, JSONFloat]]:
+    """
+    Check if a JSON value represents a complex float, as per the behavior of zarr-python 2.x
+    """
+    return (
+        not isinstance(data, str)
+        and isinstance(data, Sequence)
+        and len(data) == 2
+        and check_json_float_v2(data[0])
+        and check_json_float_v2(data[1])
+    )
+
+
+def check_json_complex_float(
+    data: JSON, zarr_format: ZarrFormat
+) -> TypeGuard[tuple[JSONFloat, JSONFloat]]:
+    if zarr_format == 2:
+        return check_json_complex_float_v2(data)
+    else:
+        return check_json_complex_float_v3(data)
+
+
 def float_to_json_v2(data: float | np.floating[Any]) -> JSONFloat:
     if np.isnan(data):
         return "NaN"
@@ -103,29 +150,28 @@ def complex_to_json(
     raise ValueError(f"Invalid zarr format: {zarr_format}. Expected 2 or 3.")
 
 
-def float_from_json_v2(data: JSONFloat, dtype: np.floating[Any]) -> np.floating[Any]:
-    if data == "NaN":
-        _data = np.nan
-    elif data == "Infinity":
-        _data = np.inf
-    elif data == "-Infinity":
-        _data = -np.inf
-    else:
-        _data = data
-    return dtype.type(_data)
+def float_from_json_v2(data: JSONFloat) -> float:
+    match data:
+        case "NaN":
+            return float("nan")
+        case "Infinity":
+            return float("inf")
+        case "-Infinity":
+            return float("-inf")
+        case _:
+            return float(data)
 
 
-def float_from_json_v3(data: JSONFloat, dtype: Any) -> np.floating[Any]:
+def float_from_json_v3(data: JSONFloat) -> float:
     # todo: support the v3-specific NaN handling
-    return float_from_json_v2(data, dtype)
+    return float_from_json_v2(data)
 
 
-def float_from_json(data: JSONFloat, dtype: Any, zarr_format: ZarrFormat) -> np.floating[Any]:
+def float_from_json(data: JSONFloat, zarr_format: ZarrFormat) -> float:
     if zarr_format == 2:
-        return float_from_json_v2(data, dtype)
+        return float_from_json_v2(data)
     else:
-        return float_from_json_v3(data, dtype)
-    raise ValueError(f"Invalid zarr format: {zarr_format}. Expected 2 or 3.")
+        return float_from_json_v3(data)
 
 
 def complex_from_json_v2(data: JSONFloat, dtype: Any) -> np.complexfloating:
@@ -142,32 +188,42 @@ def complex_from_json(
     if zarr_format == 2:
         return complex_from_json_v2(data, dtype)
     else:
-        if check_json_complex_float(data):
+        if check_json_complex_float_v3(data):
             return complex_from_json_v3(data, dtype)
         else:
             raise TypeError(f"Invalid type: {data}. Expected a sequence of two numbers.")
     raise ValueError(f"Invalid zarr format: {zarr_format}. Expected 2 or 3.")
 
+
 TDType = TypeVar("TDType", bound=np.dtype[Any])
 TScalar = TypeVar("TScalar", bound=np.generic)
 
-@dataclass(frozen=True, kw_only=True)
-class Flexible:
-    length: int
 
 class DTypeWrapper(Generic[TDType, TScalar], ABC, Metadata):
     name: ClassVar[str]
-    dtype_cls: ClassVar[type[TDType]] # this class will create a numpy dtype
+    dtype_cls: ClassVar[type[TDType]]  # this class will create a numpy dtype
     kind: ClassVar[DataTypeFlavor]
-    _default_value: object
+    default_value: TScalar
+
+    def __init_subclass__(cls) -> None:
+        # Subclasses will bind the first generic type parameter to an attribute of the class
+        # TODO: wrap this in some *very informative* error handling
+        generic_args = get_args(get_original_bases(cls)[0])
+        cls.dtype_cls = generic_args[0]
+        return super().__init_subclass__()
 
     def to_dict(self) -> dict[str, JSON]:
         return {"name": self.name}
 
-    def default_value(self: Self, *, endianness: Endianness | None = None) -> TScalar:
-        return cast(np.generic, self.to_numpy(endianness=endianness).type(self._default_value))
+    def cast_value(self: Self, value: object, *, endianness: Endianness | None = None) -> TScalar:
+        return cast(np.generic, self.to_dtype(endianness=endianness).type(value))
 
-    def to_numpy(self: Self, *, endianness: Endianness | None = None) -> TDType:
+    @classmethod
+    @abstractmethod
+    def from_dtype(cls: type[Self], dtype: TDType) -> Self:
+        raise NotImplementedError
+
+    def to_dtype(self: Self, *, endianness: Endianness | None = None) -> TDType:
         endian_str = endianness_to_numpy_str(endianness)
         return self.dtype_cls().newbyteorder(endian_str)
 
@@ -192,7 +248,11 @@ def from_json_value(
 class Bool(DTypeWrapper[np.dtypes.BoolDType, np.bool_]):
     name = "bool"
     kind = "boolean"
-    default = False
+    default_value = np.False_
+
+    @classmethod
+    def from_dtype(cls, dtype: np.dtypes.BoolDType) -> Self:
+        return cls()
 
     def to_json_value(self, data: np.generic, zarr_format: ZarrFormat) -> bool:
         return bool(data)
@@ -201,15 +261,16 @@ def from_json_value(
         self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
     ) -> np.bool_:
         if check_json_bool(data):
-            return self.to_numpy(endianness=endianness).type(data)
+            return self.to_dtype(endianness=endianness).type(data)
         raise TypeError(f"Invalid type: {data}. Expected a boolean.")
 
 
-register_data_type(Bool)
-
-class BaseInt(DTypeWrapper[TDType, TScalar]):
+class IntWrapperBase(DTypeWrapper[TDType, TScalar]):
     kind = "numeric"
-    default = 0
+
+    @classmethod
+    def from_dtype(cls, dtype: TDType) -> Self:
+        return cls()
 
     def to_json_value(self, data: np.generic, zarr_format: ZarrFormat) -> int:
         return int(data)
@@ -218,76 +279,64 @@ def from_json_value(
         self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
     ) -> TScalar:
         if check_json_int(data):
-            return self.to_numpy(endianness=endianness).type(data)
+            return self.to_dtype(endianness=endianness).type(data)
         raise TypeError(f"Invalid type: {data}. Expected an integer.")
 
 
 @dataclass(frozen=True, kw_only=True)
-class Int8(BaseInt[np.dtypes.Int8DType, np.int8]):
+class Int8(IntWrapperBase[np.dtypes.Int8DType, np.int8]):
     name = "int8"
-
-
-register_data_type(Int8)
+    default_value = np.int8(0)
 
 
 @dataclass(frozen=True, kw_only=True)
-class UInt8(DTypeWrapper[np.dtypes.UInt8DType, np.uint8]):
+class UInt8(IntWrapperBase[np.dtypes.UInt8DType, np.uint8]):
     name = "uint8"
-
-
-register_data_type(UInt8)
+    default_value = np.uint8(0)
 
 
 @dataclass(frozen=True, kw_only=True)
-class Int16(DTypeWrapper[np.dtypes.Int16DType, np.int16]):
+class Int16(IntWrapperBase[np.dtypes.Int16DType, np.int16]):
     name = "int16"
-
-
-register_data_type(Int16)
+    default_value = np.int16(0)
 
 
 @dataclass(frozen=True, kw_only=True)
-class UInt16(DTypeWrapper[np.dtypes.UInt16DType, np.uint16]):
+class UInt16(IntWrapperBase[np.dtypes.UInt16DType, np.uint16]):
     name = "uint16"
-
-register_data_type(UInt16)
+    default_value = np.uint16(0)
 
 
 @dataclass(frozen=True, kw_only=True)
-class Int32(DTypeWrapper[np.dtypes.Int32DType, np.int32]):
+class Int32(IntWrapperBase[np.dtypes.Int32DType, np.int32]):
     name = "int32"
-
-
-register_data_type(Int32)
+    default_value = np.int32(0)
 
 
 @dataclass(frozen=True, kw_only=True)
-class UInt32(DTypeWrapper[np.dtypes.UInt32DType, np.uint32]):
+class UInt32(IntWrapperBase[np.dtypes.UInt32DType, np.uint32]):
     name = "uint32"
-
-register_data_type(UInt32)
+    default_value = np.uint32(0)
 
 
 @dataclass(frozen=True, kw_only=True)
-class Int64(DTypeWrapper[np.dtypes.Int64DType, np.int64]):
+class Int64(IntWrapperBase[np.dtypes.Int64DType, np.int64]):
     name = "int64"
-
-
-register_data_type(Int64)
+    default_value = np.int64(0)
 
 
 @dataclass(frozen=True, kw_only=True)
-class UInt64(DTypeWrapper[np.dtypes.UInt64DType, np.uint64]):
+class UInt64(IntWrapperBase[np.dtypes.UInt64DType, np.uint64]):
     name = "uint64"
+    default_value = np.uint64(0)
 
 
-
-register_data_type(UInt64)
-
-
-class FloatBase(DTypeWrapper[TDType, TScalar]):
+class FloatWrapperBase(DTypeWrapper[TDType, TScalar]):
     kind = "numeric"
-    default = 0.0
+
+    @classmethod
+    def from_dtype(cls, dtype: TDType) -> Self:
+        return cls()
 
     def to_json_value(self, data: np.generic, zarr_format: ZarrFormat) -> JSONFloat:
         return float_to_json(data, zarr_format)
@@ -295,39 +344,38 @@ def to_json_value(self, data: np.generic, zarr_format: ZarrFormat) -> JSONFloat:
     def from_json_value(
         self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
     ) -> TScalar:
-        if check_json_float(data):
-            return self.to_numpy(endianness=endianness).type(float_from_json)(data, zarr_format)
+        if check_json_float_v2(data):
+            return self.to_dtype(endianness=endianness).type(float_from_json(data, zarr_format))
         raise TypeError(f"Invalid type: {data}. Expected a float.")
 
+
 @dataclass(frozen=True, kw_only=True)
-class Float16(DTypeWrapper[np.dtypes.Float16DType, np.float16]):
+class Float16(FloatWrapperBase[np.dtypes.Float16DType, np.float16]):
     name = "float16"
-
-
-register_data_type(Float16)
+    default_value = np.float16(0)
 
 
 @dataclass(frozen=True, kw_only=True)
-class Float32(DTypeWrapper[np.dtypes.Float32DType, np.float32]):
+class Float32(FloatWrapperBase[np.dtypes.Float32DType, np.float32]):
     name = "float32"
- 
-
-register_data_type(Float32)
+    default_value = np.float32(0)
 
 
 @dataclass(frozen=True, kw_only=True)
-class Float64(DTypeWrapper[np.dtypes.Float64DType, np.float64]):
+class Float64(FloatWrapperBase[np.dtypes.Float64DType, np.float64]):
     name = "float64"
-
-
-register_data_type(Float64)
+    default_value = np.float64(0)
 
 
 @dataclass(frozen=True, kw_only=True)
 class Complex64(DTypeWrapper[np.dtypes.Complex64DType, np.complex64]):
     name = "complex64"
     kind = "numeric"
-    default = 0.0 + 0.0j
+    default_value = np.complex64(0)
+
+    @classmethod
+    def from_dtype(cls, dtype: np.dtypes.Complex64DType) -> Self:
+        return cls()
 
     def to_json_value(
         self, data: np.generic, zarr_format: ZarrFormat
@@ -337,22 +385,22 @@ def to_json_value(
     def from_json_value(
         self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
     ) -> np.complex64:
-        if check_json_complex_float(data):
+        if check_json_complex_float_v3(data):
             return complex_from_json(
-                data, dtype=self.to_numpy(endianness=endianness), zarr_format=zarr_format
+                data, dtype=self.to_dtype(endianness=endianness), zarr_format=zarr_format
             )
         raise TypeError(f"Invalid type: {data}. Expected a complex float.")
 
 
-register_data_type(Complex64)
-
-
 @dataclass(frozen=True, kw_only=True)
 class Complex128(DTypeWrapper[np.dtypes.Complex128DType, np.complex128]):
     name = "complex128"
     kind = "numeric"
-    dtype_cls = np.dtypes.Complex128DType
-    default = 0.0 + 0.0j
+    default_value = np.complex128(0)
+
+    @classmethod
+    def from_dtype(cls, dtype: np.dtypes.Complex128DType) -> Self:
+        return cls()
 
     def to_json_value(
         self, data: np.generic, zarr_format: ZarrFormat
@@ -362,28 +410,36 @@ def to_json_value(
     def from_json_value(
         self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
     ) -> np.complex128:
-        if check_json_complex_float(data):
+        if check_json_complex_float_v3(data):
             return complex_from_json(
-                data, dtype=self.to_numpy(endianness=endianness), zarr_format=zarr_format
+                data, dtype=self.to_dtype(endianness=endianness), zarr_format=zarr_format
             )
         raise TypeError(f"Invalid type: {data}. Expected a complex float.")
 
 
-register_data_type(Complex128)
+@dataclass(frozen=True, kw_only=True)
+class FlexibleWrapperBase(DTypeWrapper[TDType, TScalar]):
+    item_size_bits: ClassVar[int]
+    length: int
+
+    @classmethod
+    def from_dtype(cls, dtype: TDType) -> Self:
+        return cls(length=dtype.itemsize // (cls.item_size_bits // 8))
+
+    def to_dtype(self, endianness: Endianness | None = None) -> TDType:
+        endianness_code = endianness_to_numpy_str(endianness)
+        return self.dtype_cls(self.length).newbyteorder(endianness_code)
 
 
 @dataclass(frozen=True, kw_only=True)
-class StaticByteString(DTypeWrapper[np.dtypes.BytesDType, np.bytes_], Flexible):
+class StaticByteString(FlexibleWrapperBase[np.dtypes.BytesDType, np.bytes_]):
     name = "numpy/static_byte_string"
     kind = "string"
-    default = b""
+    default_value = b""
+    item_size_bits = 8
 
     def to_dict(self) -> dict[str, JSON]:
-        return {"name": self.name, "configuration": {"capacity": self.length}}
-
-    def to_numpy(self, endianness: Endianness | None = "native") -> np.dtypes.BytesDType:
-        endianness_code = endianness_to_numpy_str(endianness)
-        return self.dtype_cls(self.length).newbyteorder(endianness_code)
+        return {"name": self.name, "configuration": {"length": self.length}}
 
     def to_json_value(
         self, data: np.generic, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
@@ -394,28 +450,25 @@ def from_json_value(
         self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
     ) -> np.bytes_:
         if check_json_bool(data):
-            return self.to_numpy(endianness=endianness).type(data.encode("ascii"))
+            return self.to_dtype(endianness=endianness).type(data.encode("ascii"))
         raise TypeError(f"Invalid type: {data}. Expected a string.")
 
 
-register_data_type(StaticByteString)
-
-
 @dataclass(frozen=True, kw_only=True)
-class StaticRawBytes(DTypeWrapper[np.dtypes.VoidDType, np.void], Flexible):
+class StaticRawBytes(FlexibleWrapperBase[np.dtypes.VoidDType, np.void]):
     name = "r*"
     kind = "bytes"
-    default = b""
-
+    default_value = np.void(b"")
+    item_size_bits = 8
 
     def to_dict(self) -> dict[str, JSON]:
-        return {"name": f"r{self.length * 8}"}
+        return {"name": f"r{self.length * self.item_size_bits}"}
 
-    def to_numpy(self, endianness: Endianness | None = "native") -> np.dtypes.VoidDType:
+    def to_dtype(self, endianness: Endianness | None = None) -> np.dtypes.VoidDType:
         # this needs to be overridden because numpy does not allow creating a void type
         # by invoking np.dtypes.VoidDType directly
         endianness_code = endianness_to_numpy_str(endianness)
-        return np.dtype(f'{endianness_code}V{self.length}')
+        return np.dtype(f"{endianness_code}V{self.length}")
 
     def to_json_value(self, data: np.generic, *, zarr_format: ZarrFormat) -> tuple[int, ...]:
         return tuple(*data.tobytes())
@@ -424,10 +477,29 @@ def from_json_value(
         self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
     ) -> np.void:
         # todo: check that this is well-formed
-        return self.to_numpy(endianness=endianness).type(bytes(data))
+        return self.to_dtype(endianness=endianness).type(bytes(data))
+
+
+@dataclass(frozen=True, kw_only=True)
+class StaticUnicodeString(FlexibleWrapperBase[np.dtypes.StrDType, np.str_]):
+    name = "numpy/static_unicode_string"
+    kind = "string"
+    default_value = np.str_("")
+    item_size_bits = 32  # UCS4 is 32 bits per code point
 
+    def to_dict(self) -> dict[str, JSON]:
+        return {"name": self.name, "configuration": {"length": self.length}}
+
+    def to_json_value(self, data: np.generic, *, zarr_format: ZarrFormat) -> str:
+        return str(data)
+
+    def from_json_value(
+        self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
+    ) -> np.str_:
+        if not check_json_str(data):
+            raise TypeError(f"Invalid type: {data}. Expected a string.")
+        return self.to_dtype(endianness=endianness).type(data)
 
-register_data_type(StaticRawBytes)
 
 if _NUMPY_SUPPORTS_VLEN_STRING:
 
@@ -435,15 +507,16 @@ def from_json_value(
     class VlenString(DTypeWrapper[np.dtypes.StringDType, str]):
         name = "numpy/vlen_string"
         kind = "string"
-        dtype_cls = np.dtypes.StringDType
-        default = ""
+        default_value = ""
+
+        @classmethod
+        def from_dtype(cls, dtype: np.dtypes.StringDType) -> Self:
+            return cls()
 
         def to_dict(self) -> dict[str, JSON]:
             return {"name": self.name}
 
-        def to_numpy(
-            self, endianness: Endianness | None = "native"
-        ) -> np.dtypes.StringDType:
+        def to_dtype(self, endianness: Endianness | None = None) -> np.dtypes.StringDType:
             endianness_code = endianness_to_numpy_str(endianness)
             return np.dtype(endianness_code + self.numpy_character_code)
 
@@ -453,7 +526,7 @@ def to_json_value(self, data: np.generic, *, zarr_format: ZarrFormat) -> str:
         def from_json_value(
             self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
         ) -> str:
-            return self.to_numpy(endianness=endianness).type(data)
+            return self.to_dtype(endianness=endianness).type(data)
 
 else:
 
@@ -461,58 +534,29 @@ def from_json_value(
     class VlenString(DTypeWrapper[np.dtypes.ObjectDType, str]):
         name = "numpy/vlen_string"
         kind = "string"
-        dtype_cls = np.dtypes.ObjectDType
-        default = ""
+        default_value = np.object_("")
 
         def to_dict(self) -> dict[str, JSON]:
             return {"name": self.name}
 
-        def to_numpy(
-            self, endianness: Endianness | None = None
-        ) -> np.dtype[np.dtypes.ObjectDType]:
-            return super().to_numpy(endianness=endianness)
+        @classmethod
+        def from_dtype(cls, dtype: np.dtypes.ObjectDType) -> Self:
+            return cls()
+
+        def to_dtype(self, endianness: Endianness | None = None) -> np.dtype[np.dtypes.ObjectDType]:
+            return super().to_dtype(endianness=endianness)
 
-        def to_json_value(self, data, *, zarr_format: ZarrFormat) -> str:
+        def to_json_value(self, data: np.generic, *, zarr_format: ZarrFormat) -> str:
             return str(data)
 
         def from_json_value(
             self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
         ) -> str:
-            return self.to_numpy(endianness=endianness).type(data)
-
-
-register_data_type(VlenString)
-
-
-@dataclass(frozen=True, kw_only=True)
-class StaticUnicodeString(DTypeWrapper[np.dtypes.StrDType, np.str_], Flexible):
-    name = "numpy/static_unicode_string"
-    kind = "string"
-    default = ""
-
-    def to_dict(self) -> dict[str, JSON]:
-        return {"name": self.name, "configuration": {"capacity": self.length}}
-
-    def to_numpy(self, endianness: Endianness | None = "native") -> np.dtypes.StrDType:
-        endianness_code = endianness_to_numpy_str(endianness)
-        return np.dtype(endianness_code + self.numpy_character_code + str(self.length))
-
-    def to_json_value(self, data: np.generic, *, zarr_format: ZarrFormat) -> str:
-        return str(data)
-
-    def from_json_value(
-        self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
-    ) -> np.str_:
-        if not check_json_bool(data):
-            raise TypeError(f"Invalid type: {data}. Expected a string.")
-        return self.to_numpy(endianness=endianness).type(data)
-
-
-register_data_type(StaticUnicodeString)
+            return self.to_dtype(endianness=endianness).type(data)
 
 
 def resolve_dtype(dtype: npt.DTypeLike | DTypeWrapper | dict[str, JSON]) -> DTypeWrapper:
-    from zarr.registry import get_data_type_from_numpy
+    from zarr.registry import get_data_type_from_dict, get_data_type_from_numpy
 
     if isinstance(dtype, DTypeWrapper):
         return dtype
@@ -526,3 +570,7 @@ def resolve_dtype(dtype: npt.DTypeLike | DTypeWrapper | dict[str, JSON]) -> DTyp
 FLOAT_DTYPE = Float16 | Float32 | Float64
 COMPLEX_DTYPE = Complex64 | Complex128
 STRING_DTYPE = StaticUnicodeString | VlenString | StaticByteString
+for dtype in get_args(
+    Bool | INTEGER_DTYPE | FLOAT_DTYPE | COMPLEX_DTYPE | STRING_DTYPE | StaticRawBytes
+):
+    register_data_type(dtype)
diff --git a/src/zarr/core/metadata/v3.py b/src/zarr/core/metadata/v3.py
index 6d2f8d35e7..3966b0d72c 100644
--- a/src/zarr/core/metadata/v3.py
+++ b/src/zarr/core/metadata/v3.py
@@ -266,14 +266,14 @@ def __init__(
         chunk_grid_parsed = ChunkGrid.from_dict(chunk_grid)
         chunk_key_encoding_parsed = ChunkKeyEncoding.from_dict(chunk_key_encoding)
         dimension_names_parsed = parse_dimension_names(dimension_names)
-        fill_value_parsed = data_type.to_numpy().type(fill_value)
+        fill_value_parsed = data_type.to_dtype().type(fill_value)
         attributes_parsed = parse_attributes(attributes)
         codecs_parsed_partial = parse_codecs(codecs)
         storage_transformers_parsed = parse_storage_transformers(storage_transformers)
 
         array_spec = ArraySpec(
             shape=shape_parsed,
-            dtype=data_type.to_numpy(),
+            dtype=data_type.to_dtype(),
             fill_value=fill_value_parsed,
             config=ArrayConfig.from_dict({}),  # TODO: config is not needed here.
             prototype=default_buffer_prototype(),  # TODO: prototype is not needed here.
@@ -308,13 +308,13 @@ def _validate_metadata(self) -> None:
             raise ValueError("`fill_value` is required.")
         for codec in self.codecs:
             codec.validate(
-                shape=self.shape, dtype=self.data_type.to_numpy(), chunk_grid=self.chunk_grid
+                shape=self.shape, dtype=self.data_type.to_dtype(), chunk_grid=self.chunk_grid
             )
 
     @property
     def dtype(self) -> np.dtype[Any]:
         """Interpret Zarr dtype as NumPy dtype"""
-        return self.data_type.to_numpy()
+        return self.data_type.to_dtype()
 
     @property
     def ndim(self) -> int:
diff --git a/src/zarr/registry.py b/src/zarr/registry.py
index 1b8ecc7a92..997174de77 100644
--- a/src/zarr/registry.py
+++ b/src/zarr/registry.py
@@ -72,12 +72,10 @@ def lazy_load(self) -> None:
 
         self.lazy_load_list.clear()
 
-    def register(self: Self, cls: type[DTypeWrapper], clobber: bool = False) -> None:
-        if cls.name in self.contents and not clobber:
-            raise ValueError(
-                f"Data type {cls.name} already registered. Use clobber=True to overwrite."
-            )
-        self.contents[cls.name] = cls
+    def register(self: Self, cls: type[DTypeWrapper]) -> None:
+        # don't register the same dtype twice
+        if cls.name not in self.contents or self.contents[cls.name] != cls:
+            self.contents[cls.name] = cls
 
     def get(self, key: str) -> type[DTypeWrapper]:
         return self.contents[key]
@@ -351,8 +349,8 @@ def get_data_type_from_numpy(dtype: npt.DTypeLike) -> DTypeWrapper:
     np_dtype = np.dtype(dtype)
     __data_type_registry.lazy_load()
     for val in __data_type_registry.contents.values():
-        if val.numpy_character_code == np_dtype.char:
-            return val.from_str(np_dtype)
+        if val.dtype_cls is type(np_dtype):
+            return val.from_dtype(np_dtype)
     raise ValueError(
         f"numpy dtype '{dtype}' does not have a corresponding Zarr dtype in: {list(__data_type_registry.contents)}."
     )
diff --git a/tests/test_codecs/test_vlen.py b/tests/test_codecs/test_vlen.py
index 8aeea834ce..f73b5e1969 100644
--- a/tests/test_codecs/test_vlen.py
+++ b/tests/test_codecs/test_vlen.py
@@ -50,15 +50,15 @@ def test_vlen_string(
 
     a[:, :] = data
     assert np.array_equal(data, a[:, :])
-    assert a.metadata.data_type == get_data_type_from_numpy(dtype)
-    assert a.dtype == expected_array_string_dtype
+    assert a.metadata.data_type == get_data_type_from_numpy(data.dtype)
+    assert a.dtype == data.dtype
 
     # test round trip
     b = Array.open(sp)
     assert isinstance(b.metadata, ArrayV3Metadata)  # needed for mypy
     assert np.array_equal(data, b[:, :])
-    assert b.metadata.data_type == get_data_type_from_numpy(dtype)
-    assert a.dtype == expected_array_string_dtype
+    assert b.metadata.data_type == get_data_type_from_numpy(data.dtype)
+    assert a.dtype == data.dtype
 
 
 @pytest.mark.parametrize("store", ["memory", "local"], indirect=["store"])
diff --git a/tests/test_metadata/test_v3.py b/tests/test_metadata/test_v3.py
index 41d8b9a4d5..b5ca92c568 100644
--- a/tests/test_metadata/test_v3.py
+++ b/tests/test_metadata/test_v3.py
@@ -12,12 +12,13 @@
 from zarr.core.chunk_key_encodings import DefaultChunkKeyEncoding, V2ChunkKeyEncoding
 from zarr.core.config import config
 from zarr.core.group import GroupMetadata, parse_node_type
-from zarr.core.metadata.dtype import Flexible, complex_from_json
+from zarr.core.metadata.dtype import FlexibleWrapperBase, complex_from_json
 from zarr.core.metadata.v3 import (
     ArrayV3Metadata,
     parse_dimension_names,
     parse_zarr_format,
 )
+from zarr.core.strings import _NUMPY_SUPPORTS_VLEN_STRING
 from zarr.errors import MetadataValidationError
 from zarr.registry import get_data_type_from_numpy
 
@@ -53,9 +54,13 @@
 )
 
 complex_dtypes = ("complex64", "complex128")
-vlen_dtypes = ("string", "bytes")
+flexible_dtypes = ("str", "bytes", 'void')
+if _NUMPY_SUPPORTS_VLEN_STRING:
+    vlen_string_dtypes = ("T","O")
+else:
+    vlen_string_dtypes = ("O")
 
-dtypes = (*bool_dtypes, *int_dtypes, *float_dtypes, *complex_dtypes, *vlen_dtypes)
+dtypes = (*bool_dtypes, *int_dtypes, *float_dtypes, *complex_dtypes, *flexible_dtypes, *vlen_string_dtypes)
 
 
 @pytest.mark.parametrize("data", [None, 1, 2, 4, 5, "3"])
@@ -116,7 +121,7 @@ def test_jsonify_fill_value_complex(fill_value: Any, dtype_str: str) -> None:
     """
     zarr_format = 3
     dtype = get_data_type_from_numpy(dtype_str)
-    expected = dtype.to_numpy().type(complex(*fill_value))
+    expected = dtype.to_dtype().type(complex(*fill_value))
     observed = dtype.from_json_value(fill_value, zarr_format=zarr_format)
     assert observed == expected
     assert dtype.to_json_value(observed, zarr_format=zarr_format) == tuple(fill_value)
@@ -325,11 +330,7 @@ async def test_special_float_fill_values(fill_value: str) -> None:
 @pytest.mark.parametrize("dtype_str", dtypes)
 def test_dtypes(dtype_str: str) -> None:
     dt = get_data_type_from_numpy(dtype_str)
-    np_dtype = dt.to_numpy()
+    np_dtype = dt.to_dtype()
+    assert isinstance(np_dtype, dt.dtype_cls)
+    assert np_dtype.type(0) == dt.cast_value(0)
 
-    if not isinstance(dt, Flexible):
-        assert dt.item_size == np_dtype.itemsize
-    else:
-        assert dt.length == np_dtype.itemsize
-
-    assert dt.numpy_character_code == np_dtype.char

From 24930b330bebaba1263d3daa33581566dc02e4c8 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Sun, 2 Mar 2025 21:55:27 +0100
Subject: [PATCH 011/130] remove unused code

---
 src/zarr/core/dtype/__init__.py |   3 -
 src/zarr/core/dtype/core.py     | 196 --------------------------------
 src/zarr/registry.py            |  69 +----------
 3 files changed, 2 insertions(+), 266 deletions(-)
 delete mode 100644 src/zarr/core/dtype/__init__.py
 delete mode 100644 src/zarr/core/dtype/core.py

diff --git a/src/zarr/core/dtype/__init__.py b/src/zarr/core/dtype/__init__.py
deleted file mode 100644
index 58b884ff23..0000000000
--- a/src/zarr/core/dtype/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from zarr.core.dtype.core import ZarrDType
-
-__all__ = ["ZarrDType"]
diff --git a/src/zarr/core/dtype/core.py b/src/zarr/core/dtype/core.py
deleted file mode 100644
index c6460706aa..0000000000
--- a/src/zarr/core/dtype/core.py
+++ /dev/null
@@ -1,196 +0,0 @@
-"""
-# Overview
-
-This module provides a proof-of-concept standalone interface for managing dtypes in the zarr-python codebase.
-
-The `ZarrDType` class introduced in this module effectively acts as a replacement for `np.dtype` throughout the
-zarr-python codebase. It attempts to encapsulate all relevant runtime information necessary for working with
-dtypes in the context of the Zarr V3 specification (e.g. is this a core dtype or not, how many bytes and what
-endianness is the dtype etc). By providing this abstraction, the module aims to:
-
-- Simplify dtype management within zarr-python
-- Support runtime flexibility and custom extensions
-- Remove unnecessary dependencies on the numpy API
-
-## Extensibility
-
-The module attempts to support user-driven extensions, allowing developers to introduce custom dtypes
-without requiring immediate changes to zarr-python. Extensions can leverage the current entrypoint mechanism,
-enabling integration of experimental features. Over time, widely adopted extensions may be formalized through
-inclusion in zarr-python or standardized via a Zarr Enhancement Proposal (ZEP), but this is not essential.
-
-## Examples
-
-### Core `dtype` Registration
-
-The following example demonstrates how to register a built-in `dtype` in the core codebase:
-
-```python
-from zarr.core.dtype import ZarrDType
-from zarr.registry import register_v3dtype
-
-class Float16(ZarrDType):
-    zarr_spec_format = "3"
-    experimental = False
-    endianness = "little"
-    byte_count = 2
-    to_numpy = np.dtype('float16')
-
-register_v3dtype(Float16)
-```
-
-### Entrypoint Extension
-
-The following example demonstrates how users can register a new `bfloat16` dtype for Zarr.
-This approach adheres to the existing Zarr entrypoint pattern as much as possible, ensuring
-consistency with other extensions. The code below would typically be part of a Python package
-that specifies the entrypoints for the extension:
-
-```python
-import ml_dtypes
-from zarr.core.dtype import ZarrDType  # User inherits from ZarrDType when creating their dtype
-
-class Bfloat16(ZarrDType):
-    zarr_spec_format = "3"
-    experimental = True
-    endianness = "little"
-    byte_count = 2
-    to_numpy = np.dtype('bfloat16')  # Enabled by importing ml_dtypes
-    configuration_v3 = {
-        "version": "example_value",
-        "author": "example_value",
-        "ml_dtypes_version": "example_value"
-    }
-```
-
-### dtype lookup
-
-The following examples demonstrate how to perform a lookup for the relevant ZarrDType, given
-a string that matches the dtype Zarr specification ID, or a numpy dtype object:
-
-```
-from zarr.registry import get_v3dtype_class, get_v3dtype_class_from_numpy
-
-get_v3dtype_class('complex64')  # returns little-endian Complex64 ZarrDType
-get_v3dtype_class('not_registered_dtype')  # ValueError
-
-get_v3dtype_class_from_numpy('>i2')  # returns big-endian Int16 ZarrDType
-get_v3dtype_class_from_numpy(np.dtype('float32'))  # returns little-endian Float32 ZarrDType
-get_v3dtype_class_from_numpy('i10')  # ValueError
-```
-
-### String dtypes
-
-The following indicates one possibility for supporting variable-length strings. It is via the
-entrypoint mechanism as in a previous example. The Apache Arrow specification does not currently
-include a dtype for fixed-length strings (only for fixed-length bytes) and so I am using string
-here to implicitly refer to a variable-length string data (there may be some subtleties with codecs
-that means this needs to be refined further):
-
-```python
-import numpy as np
-from zarr.core.dtype import ZarrDType  # User inherits from ZarrDType when creating their dtype
-
-try:
-    to_numpy = np.dtypes.StringDType()
-except AttributeError:
-    to_numpy = np.dtypes.ObjectDType()
-
-class String(ZarrDType):
-    zarr_spec_format = "3"
-    experimental = True
-    endianness = 'little'
-    byte_count = None  # None is defined to mean variable
-    to_numpy = to_numpy
-```
-
-### int4 dtype
-
-There is currently considerable interest in the AI community in 'quantising' models - storing
-models at reduced precision, while minimising loss of information content. There are a number
-of sub-byte dtypes that the community are using e.g. int4. Unfortunately numpy does not
-currently have support for handling such sub-byte dtypes in an easy way. However, they can
-still be held in a numpy array and then passed (in a zero-copy way) to something like pytorch
-which can handle appropriately:
-
-```python
-import numpy as np
-from zarr.core.dtype import ZarrDType  # User inherits from ZarrDType when creating their dtype
-
-class Int4(ZarrDType):
-    zarr_spec_format = "3"
-    experimental = True
-    endianness = 'little'
-    byte_count = 1  # this is ugly, but I could change this from byte_count to bit_count if there was consensus
-    to_numpy = np.dtype('B')  # could also be np.dtype('V1'), but this would prevent bit-twiddling
-    configuration_v3 = {
-        "version": "example_value",
-        "author": "example_value",
-    }
-```
-"""
-
-from __future__ import annotations
-
-from typing import Any, Literal
-
-import numpy as np
-
-
-class FrozenClassVariables(type):
-    def __setattr__(cls, attr: str, value: object) -> None:
-        if hasattr(cls, attr):
-            raise ValueError(f"Attribute {attr} on ZarrDType class can not be changed once set.")
-        else:
-            raise AttributeError(f"'{cls}' object has no attribute '{attr}'")
-
-
-class ZarrDType(metaclass=FrozenClassVariables):
-    zarr_spec_format: Literal["2", "3"]  # the version of the zarr spec used
-    experimental: bool  # is this in the core spec or not
-    endianness: Literal[
-        "big", "little", None
-    ]  # None indicates not defined i.e. single byte or byte strings
-    byte_count: int | None  # None indicates variable count
-    to_numpy: np.dtype[Any]  # may involve installing a a numpy extension e.g. ml_dtypes;
-
-    configuration_v3: dict | None  # TODO: understand better how this is recommended by the spec
-
-    _zarr_spec_identifier: str  # implementation detail used to map to core spec
-
-    def __init_subclass__(  # enforces all required fields are set and basic sanity checks
-        cls,
-        **kwargs,
-    ) -> None:
-        required_attrs = [
-            "zarr_spec_format",
-            "experimental",
-            "endianness",
-            "byte_count",
-            "to_numpy",
-        ]
-        for attr in required_attrs:
-            if not hasattr(cls, attr):
-                raise ValueError(f"{attr} is a required attribute for a Zarr dtype.")
-
-        if not hasattr(cls, "configuration_v3"):
-            cls.configuration_v3 = None
-
-        cls._zarr_spec_identifier = (
-            "big_" + cls.__qualname__.lower()
-            if cls.endianness == "big"
-            else cls.__qualname__.lower()
-        )  # how this dtype is identified in core spec; convention is prefix with big_ for big-endian
-
-        cls._validate()  # sanity check on basic requirements
-
-        super().__init_subclass__(**kwargs)
-
-    # TODO: add further checks
-    @classmethod
-    def _validate(cls):
-        if cls.byte_count is not None and cls.byte_count <= 0:
-            raise ValueError("byte_count must be a positive integer.")
-
-        if cls.byte_count == 1 and cls.endianness is not None:
-            raise ValueError("Endianness must be None for single-byte types.")
diff --git a/src/zarr/registry.py b/src/zarr/registry.py
index 997174de77..373e118e78 100644
--- a/src/zarr/registry.py
+++ b/src/zarr/registry.py
@@ -24,7 +24,6 @@
     )
     from zarr.core.buffer import Buffer, NDBuffer
     from zarr.core.common import JSON
-    from zarr.core.dtype import ZarrDType
     from zarr.core.metadata.dtype import DTypeWrapper
 
 __all__ = [
@@ -33,14 +32,10 @@
     "get_codec_class",
     "get_ndbuffer_class",
     "get_pipeline_class",
-    "get_v2dtype_class",
-    "get_v3dtype_class",
     "register_buffer",
     "register_codec",
     "register_ndbuffer",
     "register_pipeline",
-    "register_v2dtype",
-    "register_v3dtype",
 ]
 
 T = TypeVar("T")
@@ -86,8 +81,6 @@ def get(self, key: str) -> type[DTypeWrapper]:
 __buffer_registry: Registry[Buffer] = Registry()
 __ndbuffer_registry: Registry[NDBuffer] = Registry()
 __data_type_registry = DataTypeRegistry()
-__v3_dtype_registry: Registry[ZarrDType] = Registry()
-__v2_dtype_registry: Registry[ZarrDType] = Registry()
 
 """
 The registry module is responsible for managing implementations of codecs,
@@ -124,13 +117,9 @@ def _collect_entrypoints() -> list[Registry[Any]]:
     __ndbuffer_registry.lazy_load_list.extend(entry_points.select(group="zarr.ndbuffer"))
     __ndbuffer_registry.lazy_load_list.extend(entry_points.select(group="zarr", name="ndbuffer"))
 
-    # __data_type_registry.lazy_load_list.extend(entry_points.select(group="zarr.data_type"))
-    # __data_type_registry.lazy_load_list.extend(entry_points.select(group="zarr", name="data_type"))
+    __data_type_registry.lazy_load_list.extend(entry_points.select(group="zarr.data_type"))
+    __data_type_registry.lazy_load_list.extend(entry_points.select(group="zarr", name="data_type"))
 
-    __v3_dtype_registry.lazy_load_list.extend(entry_points.select(group="zarr.v3dtype"))
-    __v3_dtype_registry.lazy_load_list.extend(entry_points.select(group="zarr", name="v3dtype"))
-    __v2_dtype_registry.lazy_load_list.extend(entry_points.select(group="zarr.v2dtype"))
-    __v2_dtype_registry.lazy_load_list.extend(entry_points.select(group="zarr", name="v2dtype"))
     __pipeline_registry.lazy_load_list.extend(entry_points.select(group="zarr.codec_pipeline"))
     __pipeline_registry.lazy_load_list.extend(
         entry_points.select(group="zarr", name="codec_pipeline")
@@ -180,14 +169,6 @@ def register_data_type(cls: type[DTypeWrapper]) -> None:
     __data_type_registry.register(cls)
 
 
-def register_v3dtype(cls: type[ZarrDType]) -> None:
-    __v3_dtype_registry.register(cls)
-
-
-def register_v2dtype(cls: type[ZarrDType]) -> None:
-    __v2_dtype_registry.register(cls)
-
-
 def get_codec_class(key: str, reload_config: bool = False) -> type[Codec]:
     if reload_config:
         _reload_config()
@@ -356,50 +337,4 @@ def get_data_type_from_numpy(dtype: npt.DTypeLike) -> DTypeWrapper:
     )
 
 
-# TODO: merge the get_vXdtype_class_ functions
-# these can be used instead of the various parse_X functions (hopefully)
-def get_v3dtype_class(dtype: str) -> type[ZarrDType]:
-    __v3_dtype_registry.lazy_load()
-    v3dtype_class = __v3_dtype_registry.get(dtype)
-    if v3dtype_class:
-        return v3dtype_class
-    raise ValueError(
-        f"ZarrDType class '{dtype}' not found in registered buffers: {list(__v3_dtype_registry)}."
-    )
-
-
-def get_v3dtype_class_from_numpy(dtype: npt.DTypeLike) -> type[ZarrDType]:
-    __v3_dtype_registry.lazy_load()
-
-    dtype = np.dtype(dtype)
-    for val in __v3_dtype_registry.values():
-        if dtype == val.to_numpy:
-            return val
-    raise ValueError(
-        f"numpy dtype '{dtype}' does not have a corresponding Zarr dtype in: {list(__v3_dtype_registry)}."
-    )
-
-
-def get_v2dtype_class(dtype: str) -> type[ZarrDType]:
-    __v2_dtype_registry.lazy_load()
-    v2dtype_class = __v2_dtype_registry.get(dtype)
-    if v2dtype_class:
-        return v2dtype_class
-    raise ValueError(
-        f"ZarrDType class '{dtype}' not found in registered buffers: {list(__v2_dtype_registry)}."
-    )
-
-
-def get_v2dtype_class_from_numpy(dtype: npt.DTypeLike) -> type[ZarrDType]:
-    __v2_dtype_registry.lazy_load()
-
-    dtype = np.dtype(dtype)
-    for val in __v2_dtype_registry.values():
-        if dtype == val.to_numpy:
-            return val
-    raise ValueError(
-        f"numpy dtype '{dtype}' does not have a corresponding Zarr dtype in: {list(__v2_dtype_registry)}."
-    )
-
-
 _collect_entrypoints()

From 703e0e16e96fb0ffb9a934b959f00d0e764be4df Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Sun, 2 Mar 2025 23:31:31 +0100
Subject: [PATCH 012/130] use wrap / unwrap instead of to_dtype / from_dtype;
 push into v2 codebase

---
 src/zarr/api/asynchronous.py    | 13 +++----
 src/zarr/codecs/_v2.py          |  6 ++--
 src/zarr/codecs/bytes.py        |  2 +-
 src/zarr/core/array.py          | 63 ++++++++++++++++-----------------
 src/zarr/core/array_spec.py     | 16 ++++++---
 src/zarr/core/buffer/cpu.py     |  9 +++--
 src/zarr/core/chunk_grids.py    |  5 ++-
 src/zarr/core/common.py         | 13 +++----
 src/zarr/core/metadata/dtype.py | 58 +++++++++++++++---------------
 src/zarr/core/metadata/v2.py    | 59 ++++++++++--------------------
 src/zarr/core/metadata/v3.py    |  8 ++---
 src/zarr/registry.py            |  2 +-
 tests/conftest.py               |  5 ++-
 tests/test_array.py             | 12 +++++--
 tests/test_metadata/test_v3.py  | 24 ++++++++-----
 15 files changed, 147 insertions(+), 148 deletions(-)

diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py
index 792e445c9d..d8462b72ef 100644
--- a/src/zarr/api/asynchronous.py
+++ b/src/zarr/api/asynchronous.py
@@ -32,6 +32,7 @@
 from zarr.core.metadata import ArrayMetadataDict, ArrayV2Metadata, ArrayV3Metadata
 from zarr.core.metadata.v2 import _default_compressor, _default_filters
 from zarr.errors import NodeTypeValidationError
+from zarr.registry import get_data_type_from_numpy
 from zarr.storage._common import make_store_path
 
 if TYPE_CHECKING:
@@ -428,11 +429,12 @@ async def save_array(
     shape = arr.shape
     chunks = getattr(arr, "chunks", None)  # for array-likes with chunks attribute
     overwrite = kwargs.pop("overwrite", None) or _infer_overwrite(mode)
+    zarr_dtype = get_data_type_from_numpy(arr.dtype)
     new = await AsyncArray._create(
         store_path,
         zarr_format=zarr_format,
         shape=shape,
-        dtype=arr.dtype,
+        dtype=zarr_dtype,
         chunks=chunks,
         overwrite=overwrite,
         **kwargs,
@@ -978,15 +980,14 @@ async def create(
         _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format)
         or _default_zarr_format()
     )
-
+    dtype_wrapped = parse_dtype(dtype, zarr_format=zarr_format)
     if zarr_format == 2:
         if chunks is None:
             chunks = shape
-        dtype = parse_dtype(dtype, zarr_format=zarr_format)
         if not filters:
-            filters = _default_filters(dtype)
+            filters = _default_filters(dtype_wrapped)
         if not compressor:
-            compressor = _default_compressor(dtype)
+            compressor = _default_compressor(dtype_wrapped)
     elif zarr_format == 3 and chunk_shape is None:  # type: ignore[redundant-expr]
         if chunks is not None:
             chunk_shape = chunks
@@ -1051,7 +1052,7 @@ async def create(
         store_path,
         shape=shape,
         chunks=chunks,
-        dtype=dtype,
+        dtype=dtype_wrapped,
         compressor=compressor,
         fill_value=fill_value,
         overwrite=overwrite,
diff --git a/src/zarr/codecs/_v2.py b/src/zarr/codecs/_v2.py
index 53edc1f4a1..e2f228f509 100644
--- a/src/zarr/codecs/_v2.py
+++ b/src/zarr/codecs/_v2.py
@@ -48,7 +48,7 @@ async def _decode_single(
         # segfaults and other bad things happening
         if chunk_spec.dtype != object:
             try:
-                chunk = chunk.view(chunk_spec.dtype)
+                chunk = chunk.view(chunk_spec.dtype.unwrap())
             except TypeError:
                 # this will happen if the dtype of the chunk
                 # does not match the dtype of the array spec i.g. if
@@ -56,7 +56,7 @@ async def _decode_single(
                 # is an object array. In this case, we need to convert the object
                 # array to the correct dtype.
 
-                chunk = np.array(chunk).astype(chunk_spec.dtype)
+                chunk = np.array(chunk).astype(chunk_spec.dtype.unwrap())
 
         elif chunk.dtype != object:
             # If we end up here, someone must have hacked around with the filters.
@@ -80,7 +80,7 @@ async def _encode_single(
         chunk = chunk_array.as_ndarray_like()
 
         # ensure contiguous and correct order
-        chunk = chunk.astype(chunk_spec.dtype, order=chunk_spec.order, copy=False)
+        chunk = chunk.astype(chunk_spec.dtype.unwrap(), order=chunk_spec.order, copy=False)
 
         # apply filters
         if self.filters:
diff --git a/src/zarr/codecs/bytes.py b/src/zarr/codecs/bytes.py
index 78c7b22fbc..4875d8e8d8 100644
--- a/src/zarr/codecs/bytes.py
+++ b/src/zarr/codecs/bytes.py
@@ -56,7 +56,7 @@ def to_dict(self) -> dict[str, JSON]:
             return {"name": "bytes", "configuration": {"endian": self.endian.value}}
 
     def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self:
-        if array_spec.dtype.itemsize == 0:
+        if array_spec.dtype.unwrap().itemsize == 0:
             if self.endian is not None:
                 return replace(self, endian=None)
         elif self.endian is None:
diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py
index 7edd467a54..3b1e6a973f 100644
--- a/src/zarr/core/array.py
+++ b/src/zarr/core/array.py
@@ -98,7 +98,7 @@
     ArrayV3MetadataDict,
     T_ArrayMetadata,
 )
-from zarr.core.metadata.dtype import DTypeWrapper
+from zarr.core.metadata.dtype import DTypeWrapper, VariableLengthString
 from zarr.core.metadata.v2 import (
     _default_compressor,
     _default_filters,
@@ -549,7 +549,7 @@ async def _create(
         *,
         # v2 and v3
         shape: ShapeLike,
-        dtype: npt.DTypeLike,
+        dtype: npt.DTypeLike[Any],
         zarr_format: ZarrFormat = 3,
         fill_value: Any | None = None,
         attributes: dict[str, JSON] | None = None,
@@ -578,18 +578,22 @@ async def _create(
         See :func:`AsyncArray.create` for more details.
         Deprecated in favor of :func:`zarr.api.asynchronous.create_array`.
         """
+        # TODO: delete this and be more strict about where parsing occurs
+        if not isinstance(dtype, DTypeWrapper):
+            dtype_parsed = get_data_type_from_numpy(np.dtype(dtype))
+        else:
+            dtype_parsed = dtype
         store_path = await make_store_path(store)
 
-        dtype_parsed = parse_dtype(dtype, zarr_format=zarr_format)
         shape = parse_shapelike(shape)
 
         if chunks is not None and chunk_shape is not None:
             raise ValueError("Only one of chunk_shape or chunks can be provided.")
 
         if chunks:
-            _chunks = normalize_chunks(chunks, shape, dtype_parsed.itemsize)
+            _chunks = normalize_chunks(chunks, shape, dtype_parsed.unwrap().itemsize)
         else:
-            _chunks = normalize_chunks(chunk_shape, shape, dtype_parsed.itemsize)
+            _chunks = normalize_chunks(chunk_shape, shape, dtype_parsed.unwrap().itemsize)
         config_parsed = parse_array_config(config)
 
         result: AsyncArray[ArrayV3Metadata] | AsyncArray[ArrayV2Metadata]
@@ -666,7 +670,7 @@ async def _create(
     @staticmethod
     def _create_metadata_v3(
         shape: ShapeLike,
-        dtype: np.dtype[Any],
+        dtype: DTypeWrapper[Any, Any],
         chunk_shape: ChunkCoords,
         fill_value: Any | None = None,
         chunk_key_encoding: ChunkKeyEncodingLike | None = None,
@@ -694,19 +698,16 @@ def _create_metadata_v3(
                 stacklevel=2,
             )
 
-        # resolve the numpy dtype into zarr v3 datatype
-        zarr_data_type = get_data_type_from_numpy(dtype)
-
         if fill_value is None:
             # v3 spec will not allow a null fill value
-            fill_value_parsed = zarr_data_type.default_value
+            fill_value_parsed = dtype.default_value
         else:
             fill_value_parsed = fill_value
 
         chunk_grid_parsed = RegularChunkGrid(chunk_shape=chunk_shape)
         return ArrayV3Metadata(
             shape=shape,
-            data_type=zarr_data_type,
+            data_type=dtype,
             chunk_grid=chunk_grid_parsed,
             chunk_key_encoding=chunk_key_encoding_parsed,
             fill_value=fill_value_parsed,
@@ -769,7 +770,7 @@ async def _create_v3(
     @staticmethod
     def _create_metadata_v2(
         shape: ChunkCoords,
-        dtype: np.dtype[Any],
+        dtype: DTypeWrapper[Any, Any],
         chunks: ChunkCoords,
         order: MemoryOrder,
         dimension_separator: Literal[".", "/"] | None = None,
@@ -781,10 +782,8 @@ def _create_metadata_v2(
         if dimension_separator is None:
             dimension_separator = "."
 
-        dtype = parse_dtype(dtype, zarr_format=2)
-
         # inject VLenUTF8 for str dtype if not already present
-        if np.issubdtype(dtype, np.str_):
+        if isinstance(dtype, VariableLengthString):
             filters = filters or []
             from numcodecs.vlen import VLenUTF8
 
@@ -793,7 +792,7 @@ def _create_metadata_v2(
 
         return ArrayV2Metadata(
             shape=shape,
-            dtype=np.dtype(dtype),
+            dtype=dtype,
             chunks=chunks,
             order=order,
             dimension_separator=dimension_separator,
@@ -2046,7 +2045,7 @@ def dtype(self) -> np.dtype[Any]:
         np.dtype
             The NumPy data type.
         """
-        return self._async_array.dtype
+        return self._async_array.dtype.unwrap()
 
     @property
     def attrs(self) -> Attributes:
@@ -3919,7 +3918,7 @@ async def init_array(
 
     from zarr.codecs.sharding import ShardingCodec, ShardingCodecIndexLocation
 
-    dtype_parsed = parse_dtype(dtype, zarr_format=zarr_format)
+    dtype_wrapped = parse_dtype(dtype, zarr_format=zarr_format)
     shape_parsed = parse_shapelike(shape)
     chunk_key_encoding_parsed = _parse_chunk_key_encoding(
         chunk_key_encoding, zarr_format=zarr_format
@@ -3934,7 +3933,10 @@ async def init_array(
         await ensure_no_existing_node(store_path, zarr_format=zarr_format)
 
     shard_shape_parsed, chunk_shape_parsed = _auto_partition(
-        array_shape=shape_parsed, shard_shape=shards, chunk_shape=chunks, dtype=dtype_parsed
+        array_shape=shape_parsed,
+        shard_shape=shards,
+        chunk_shape=chunks,
+        item_size=dtype_wrapped.unwrap().itemsize,
     )
     chunks_out: tuple[int, ...]
     meta: ArrayV2Metadata | ArrayV3Metadata
@@ -3950,9 +3952,8 @@ async def init_array(
             raise ValueError("Zarr format 2 arrays do not support `serializer`.")
 
         filters_parsed, compressor_parsed = _parse_chunk_encoding_v2(
-            compressor=compressors, filters=filters, dtype=np.dtype(dtype)
+            compressor=compressors, filters=filters, dtype=dtype_wrapped
         )
-
         if dimension_names is not None:
             raise ValueError("Zarr format 2 arrays do not support dimension names.")
         if order is None:
@@ -3962,7 +3963,7 @@ async def init_array(
 
         meta = AsyncArray._create_metadata_v2(
             shape=shape_parsed,
-            dtype=dtype_parsed,
+            dtype=dtype_wrapped,
             chunks=chunk_shape_parsed,
             dimension_separator=chunk_key_encoding_parsed.separator,
             fill_value=fill_value,
@@ -3976,7 +3977,7 @@ async def init_array(
             compressors=compressors,
             filters=filters,
             serializer=serializer,
-            dtype=dtype_parsed,
+            dtype=dtype_wrapped,
         )
         sub_codecs = cast(tuple[Codec, ...], (*array_array, array_bytes, *bytes_bytes))
         codecs_out: tuple[Codec, ...]
@@ -3991,7 +3992,7 @@ async def init_array(
             )
             sharding_codec.validate(
                 shape=chunk_shape_parsed,
-                dtype=dtype_parsed,
+                dtype=dtype_wrapped,
                 chunk_grid=RegularChunkGrid(chunk_shape=shard_shape_parsed),
             )
             codecs_out = (sharding_codec,)
@@ -4002,7 +4003,7 @@ async def init_array(
 
         meta = AsyncArray._create_metadata_v3(
             shape=shape_parsed,
-            dtype=dtype_parsed,
+            dtype=dtype_wrapped,
             fill_value=fill_value,
             chunk_shape=chunks_out,
             chunk_key_encoding=chunk_key_encoding_parsed,
@@ -4210,12 +4211,11 @@ def _parse_chunk_key_encoding(
 
 
 def _get_default_chunk_encoding_v3(
-    np_dtype: np.dtype[Any],
+    dtype: DTypeWrapper[Any, Any],
 ) -> tuple[tuple[ArrayArrayCodec, ...], ArrayBytesCodec, tuple[BytesBytesCodec, ...]]:
     """
     Get the default ArrayArrayCodecs, ArrayBytesCodec, and BytesBytesCodec for a given dtype.
     """
-    dtype = get_data_type_from_numpy(np_dtype)
 
     default_filters = zarr_config.get("array.v3_default_filters").get(dtype.kind)
     default_serializer = zarr_config.get("array.v3_default_serializer").get(dtype.kind)
@@ -4229,14 +4229,14 @@ def _get_default_chunk_encoding_v3(
 
 
 def _get_default_chunk_encoding_v2(
-    np_dtype: np.dtype[Any],
+    dtype: DTypeWrapper[Any, Any],
 ) -> tuple[tuple[numcodecs.abc.Codec, ...] | None, numcodecs.abc.Codec | None]:
     """
     Get the default chunk encoding for Zarr format 2 arrays, given a dtype
     """
 
-    compressor_dict = _default_compressor(np_dtype)
-    filter_dicts = _default_filters(np_dtype)
+    compressor_dict = _default_compressor(dtype)
+    filter_dicts = _default_filters(dtype)
 
     compressor = None
     if compressor_dict is not None:
@@ -4253,13 +4253,12 @@ def _parse_chunk_encoding_v2(
     *,
     compressor: CompressorsLike,
     filters: FiltersLike,
-    dtype: np.dtype[Any],
+    dtype: DTypeWrapper[Any, Any],
 ) -> tuple[tuple[numcodecs.abc.Codec, ...] | None, numcodecs.abc.Codec | None]:
     """
     Generate chunk encoding classes for Zarr format 2 arrays with optional defaults.
     """
     default_filters, default_compressor = _get_default_chunk_encoding_v2(dtype)
-
     _filters: tuple[numcodecs.abc.Codec, ...] | None
     _compressor: numcodecs.abc.Codec | None
 
diff --git a/src/zarr/core/array_spec.py b/src/zarr/core/array_spec.py
index 59d3cc6b40..cf92f11050 100644
--- a/src/zarr/core/array_spec.py
+++ b/src/zarr/core/array_spec.py
@@ -3,8 +3,6 @@
 from dataclasses import dataclass, fields
 from typing import TYPE_CHECKING, Any, Literal, Self, TypedDict, cast
 
-import numpy as np
-
 from zarr.core.common import (
     MemoryOrder,
     parse_bool,
@@ -13,10 +11,14 @@
     parse_shapelike,
 )
 from zarr.core.config import config as zarr_config
+from zarr.core.metadata.dtype import DTypeWrapper
+from zarr.registry import get_data_type_from_numpy
 
 if TYPE_CHECKING:
     from typing import NotRequired
 
+    import numpy.typing as npt
+
     from zarr.core.buffer import BufferPrototype
     from zarr.core.common import ChunkCoords
 
@@ -90,7 +92,7 @@ def parse_array_config(data: ArrayConfigLike | None) -> ArrayConfig:
 @dataclass(frozen=True)
 class ArraySpec:
     shape: ChunkCoords
-    dtype: np.dtype[Any]
+    dtype: DTypeWrapper[Any, Any]
     fill_value: Any
     config: ArrayConfig
     prototype: BufferPrototype
@@ -98,13 +100,17 @@ class ArraySpec:
     def __init__(
         self,
         shape: ChunkCoords,
-        dtype: np.dtype[Any],
+        dtype: npt.DtypeLike | DTypeWrapper[Any, Any],
         fill_value: Any,
         config: ArrayConfig,
         prototype: BufferPrototype,
     ) -> None:
         shape_parsed = parse_shapelike(shape)
-        dtype_parsed = np.dtype(dtype)
+        if not isinstance(dtype, DTypeWrapper):
+            dtype_parsed = get_data_type_from_numpy(dtype)
+        else:
+            dtype_parsed = dtype
+
         fill_value_parsed = parse_fill_value(fill_value)
 
         object.__setattr__(self, "shape", shape_parsed)
diff --git a/src/zarr/core/buffer/cpu.py b/src/zarr/core/buffer/cpu.py
index 225adb6f5c..b83f710747 100644
--- a/src/zarr/core/buffer/cpu.py
+++ b/src/zarr/core/buffer/cpu.py
@@ -10,6 +10,7 @@
 import numpy.typing as npt
 
 from zarr.core.buffer import core
+from zarr.core.metadata.dtype import DTypeWrapper
 from zarr.registry import (
     register_buffer,
     register_ndbuffer,
@@ -150,14 +151,18 @@ def create(
         cls,
         *,
         shape: Iterable[int],
-        dtype: npt.DTypeLike,
+        dtype: DTypeWrapper[Any, Any],
         order: Literal["C", "F"] = "C",
         fill_value: Any | None = None,
     ) -> Self:
         if fill_value is None:
             return cls(np.zeros(shape=tuple(shape), dtype=dtype, order=order))
         else:
-            return cls(np.full(shape=tuple(shape), fill_value=fill_value, dtype=dtype, order=order))
+            return cls(
+                np.full(
+                    shape=tuple(shape), fill_value=fill_value, dtype=dtype.unwrap(), order=order
+                )
+            )
 
     @classmethod
     def from_numpy_array(cls, array_like: npt.ArrayLike) -> Self:
diff --git a/src/zarr/core/chunk_grids.py b/src/zarr/core/chunk_grids.py
index d3e40c26ed..74bf9b6ba8 100644
--- a/src/zarr/core/chunk_grids.py
+++ b/src/zarr/core/chunk_grids.py
@@ -63,7 +63,7 @@ def _guess_chunks(
     """
     if isinstance(shape, int):
         shape = (shape,)
-
+    typesize = max(typesize, 8)
     ndims = len(shape)
     # require chunks to have non-zero length for all dimensions
     chunks = np.maximum(np.array(shape, dtype="=f8"), 1)
@@ -204,7 +204,7 @@ def _auto_partition(
     array_shape: tuple[int, ...],
     chunk_shape: tuple[int, ...] | Literal["auto"],
     shard_shape: ShardsLike | None,
-    dtype: np.dtype[Any],
+    item_size: int,
 ) -> tuple[tuple[int, ...] | None, tuple[int, ...]]:
     """
     Automatically determine the shard shape and chunk shape for an array, given the shape and dtype of the array.
@@ -214,7 +214,6 @@ def _auto_partition(
     of the array; if the `chunk_shape` is also "auto", then the chunks will be set heuristically as well,
     given the dtype and shard shape. Otherwise, the chunks will be returned as-is.
     """
-    item_size = dtype.itemsize
     if shard_shape is None:
         _shards_out: None | tuple[int, ...] = None
         if chunk_shape == "auto":
diff --git a/src/zarr/core/common.py b/src/zarr/core/common.py
index e398eff406..85dadc2b53 100644
--- a/src/zarr/core/common.py
+++ b/src/zarr/core/common.py
@@ -19,7 +19,6 @@
 import numpy as np
 
 from zarr.core.config import config as zarr_config
-from zarr.core.strings import _VLEN_STRING_DTYPE
 
 if TYPE_CHECKING:
     from collections.abc import Awaitable, Callable, Iterator
@@ -167,14 +166,10 @@ def parse_bool(data: Any) -> bool:
     raise ValueError(f"Expected bool, got {data} instead.")
 
 
-def parse_dtype(dtype: Any, zarr_format: ZarrFormat) -> np.dtype[Any]:
-    if dtype is str or dtype == "str":
-        if zarr_format == 2:
-            # special case as object
-            return np.dtype("object")
-        else:
-            return _VLEN_STRING_DTYPE
-    return np.dtype(dtype)
+def parse_dtype(dtype: Any, zarr_format: ZarrFormat) -> DTypeWrapper[Any, Any]:
+    from zarr.registry import get_data_type_from_numpy
+
+    return get_data_type_from_numpy(np.dtype(dtype))
 
 
 def _warn_write_empty_chunks_kwarg() -> None:
diff --git a/src/zarr/core/metadata/dtype.py b/src/zarr/core/metadata/dtype.py
index 1b57831943..5d382076b4 100644
--- a/src/zarr/core/metadata/dtype.py
+++ b/src/zarr/core/metadata/dtype.py
@@ -216,14 +216,14 @@ def to_dict(self) -> dict[str, JSON]:
         return {"name": self.name}
 
     def cast_value(self: Self, value: object, *, endianness: Endianness | None = None) -> TScalar:
-        return cast(np.generic, self.to_dtype(endianness=endianness).type(value))
+        return cast(np.generic, self.unwrap(endianness=endianness).type(value))
 
     @classmethod
     @abstractmethod
-    def from_dtype(cls: type[Self], dtype: TDType) -> Self:
+    def wrap(cls: type[Self], dtype: TDType) -> Self:
         raise NotImplementedError
 
-    def to_dtype(self: Self, *, endianness: Endianness | None = None) -> TDType:
+    def unwrap(self: Self, *, endianness: Endianness | None = None) -> TDType:
         endian_str = endianness_to_numpy_str(endianness)
         return self.dtype_cls().newbyteorder(endian_str)
 
@@ -251,7 +251,7 @@ class Bool(DTypeWrapper[np.dtypes.BoolDType, np.bool_]):
     default_value = np.False_
 
     @classmethod
-    def from_dtype(cls, dtype: np.dtypes.BoolDType) -> Self:
+    def wrap(cls, dtype: np.dtypes.BoolDType) -> Self:
         return cls()
 
     def to_json_value(self, data: np.generic, zarr_format: ZarrFormat) -> bool:
@@ -261,7 +261,7 @@ def from_json_value(
         self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
     ) -> np.bool_:
         if check_json_bool(data):
-            return self.to_dtype(endianness=endianness).type(data)
+            return self.unwrap(endianness=endianness).type(data)
         raise TypeError(f"Invalid type: {data}. Expected a boolean.")
 
 
@@ -269,7 +269,7 @@ class IntWrapperBase(DTypeWrapper[TDType, TScalar]):
     kind = "numeric"
 
     @classmethod
-    def from_dtype(cls, dtype: TDType) -> Self:
+    def wrap(cls, dtype: TDType) -> Self:
         return cls()
 
     def to_json_value(self, data: np.generic, zarr_format: ZarrFormat) -> int:
@@ -279,7 +279,7 @@ def from_json_value(
         self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
     ) -> TScalar:
         if check_json_int(data):
-            return self.to_dtype(endianness=endianness).type(data)
+            return self.unwrap(endianness=endianness).type(data)
         raise TypeError(f"Invalid type: {data}. Expected an integer.")
 
 
@@ -335,7 +335,7 @@ class FloatWrapperBase(DTypeWrapper[TDType, TScalar]):
     kind = "numeric"
 
     @classmethod
-    def from_dtype(cls, dtype: TDType) -> Self:
+    def wrap(cls, dtype: TDType) -> Self:
         return cls()
 
     def to_json_value(self, data: np.generic, zarr_format: ZarrFormat) -> JSONFloat:
@@ -345,7 +345,7 @@ def from_json_value(
         self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
     ) -> TScalar:
         if check_json_float_v2(data):
-            return self.to_dtype(endianness=endianness).type(float_from_json(data, zarr_format))
+            return self.unwrap(endianness=endianness).type(float_from_json(data, zarr_format))
         raise TypeError(f"Invalid type: {data}. Expected a float.")
 
 
@@ -374,7 +374,7 @@ class Complex64(DTypeWrapper[np.dtypes.Complex64DType, np.complex64]):
     default_value = np.complex64(0)
 
     @classmethod
-    def from_dtype(cls, dtype: np.dtypes.Complex64DType) -> Self:
+    def wrap(cls, dtype: np.dtypes.Complex64DType) -> Self:
         return cls()
 
     def to_json_value(
@@ -387,7 +387,7 @@ def from_json_value(
     ) -> np.complex64:
         if check_json_complex_float_v3(data):
             return complex_from_json(
-                data, dtype=self.to_dtype(endianness=endianness), zarr_format=zarr_format
+                data, dtype=self.unwrap(endianness=endianness), zarr_format=zarr_format
             )
         raise TypeError(f"Invalid type: {data}. Expected a complex float.")
 
@@ -399,7 +399,7 @@ class Complex128(DTypeWrapper[np.dtypes.Complex128DType, np.complex128]):
     default_value = np.complex128(0)
 
     @classmethod
-    def from_dtype(cls, dtype: np.dtypes.Complex128DType) -> Self:
+    def wrap(cls, dtype: np.dtypes.Complex128DType) -> Self:
         return cls()
 
     def to_json_value(
@@ -412,7 +412,7 @@ def from_json_value(
     ) -> np.complex128:
         if check_json_complex_float_v3(data):
             return complex_from_json(
-                data, dtype=self.to_dtype(endianness=endianness), zarr_format=zarr_format
+                data, dtype=self.unwrap(endianness=endianness), zarr_format=zarr_format
             )
         raise TypeError(f"Invalid type: {data}. Expected a complex float.")
 
@@ -423,10 +423,10 @@ class FlexibleWrapperBase(DTypeWrapper[TDType, TScalar]):
     length: int
 
     @classmethod
-    def from_dtype(cls, dtype: TDType) -> Self:
+    def wrap(cls, dtype: TDType) -> Self:
         return cls(length=dtype.itemsize // (cls.item_size_bits // 8))
 
-    def to_dtype(self, endianness: Endianness | None = None) -> TDType:
+    def unwrap(self, endianness: Endianness | None = None) -> TDType:
         endianness_code = endianness_to_numpy_str(endianness)
         return self.dtype_cls(self.length).newbyteorder(endianness_code)
 
@@ -450,7 +450,7 @@ def from_json_value(
         self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
     ) -> np.bytes_:
         if check_json_bool(data):
-            return self.to_dtype(endianness=endianness).type(data.encode("ascii"))
+            return self.unwrap(endianness=endianness).type(data.encode("ascii"))
         raise TypeError(f"Invalid type: {data}. Expected a string.")
 
 
@@ -464,7 +464,7 @@ class StaticRawBytes(FlexibleWrapperBase[np.dtypes.VoidDType, np.void]):
     def to_dict(self) -> dict[str, JSON]:
         return {"name": f"r{self.length * self.item_size_bits}"}
 
-    def to_dtype(self, endianness: Endianness | None = None) -> np.dtypes.VoidDType:
+    def unwrap(self, endianness: Endianness | None = None) -> np.dtypes.VoidDType:
         # this needs to be overridden because numpy does not allow creating a void type
         # by invoking np.dtypes.VoidDType directly
         endianness_code = endianness_to_numpy_str(endianness)
@@ -477,7 +477,7 @@ def from_json_value(
         self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
     ) -> np.void:
         # todo: check that this is well-formed
-        return self.to_dtype(endianness=endianness).type(bytes(data))
+        return self.unwrap(endianness=endianness).type(bytes(data))
 
 
 @dataclass(frozen=True, kw_only=True)
@@ -498,25 +498,25 @@ def from_json_value(
     ) -> np.str_:
         if not check_json_str(data):
             raise TypeError(f"Invalid type: {data}. Expected a string.")
-        return self.to_dtype(endianness=endianness).type(data)
+        return self.unwrap(endianness=endianness).type(data)
 
 
 if _NUMPY_SUPPORTS_VLEN_STRING:
 
     @dataclass(frozen=True, kw_only=True)
-    class VlenString(DTypeWrapper[np.dtypes.StringDType, str]):
+    class VariableLengthString(DTypeWrapper[np.dtypes.StringDType, str]):
         name = "numpy/vlen_string"
         kind = "string"
         default_value = ""
 
         @classmethod
-        def from_dtype(cls, dtype: np.dtypes.StringDType) -> Self:
+        def wrap(cls, dtype: np.dtypes.StringDType) -> Self:
             return cls()
 
         def to_dict(self) -> dict[str, JSON]:
             return {"name": self.name}
 
-        def to_dtype(self, endianness: Endianness | None = None) -> np.dtypes.StringDType:
+        def unwrap(self, endianness: Endianness | None = None) -> np.dtypes.StringDType:
             endianness_code = endianness_to_numpy_str(endianness)
             return np.dtype(endianness_code + self.numpy_character_code)
 
@@ -526,12 +526,12 @@ def to_json_value(self, data: np.generic, *, zarr_format: ZarrFormat) -> str:
         def from_json_value(
             self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
         ) -> str:
-            return self.to_dtype(endianness=endianness).type(data)
+            return self.unwrap(endianness=endianness).type(data)
 
 else:
 
     @dataclass(frozen=True, kw_only=True)
-    class VlenString(DTypeWrapper[np.dtypes.ObjectDType, str]):
+    class VariableLengthString(DTypeWrapper[np.dtypes.ObjectDType, str]):
         name = "numpy/vlen_string"
         kind = "string"
         default_value = np.object_("")
@@ -540,11 +540,11 @@ def to_dict(self) -> dict[str, JSON]:
             return {"name": self.name}
 
         @classmethod
-        def from_dtype(cls, dtype: np.dtypes.ObjectDType) -> Self:
+        def wrap(cls, dtype: np.dtypes.ObjectDType) -> Self:
             return cls()
 
-        def to_dtype(self, endianness: Endianness | None = None) -> np.dtype[np.dtypes.ObjectDType]:
-            return super().to_dtype(endianness=endianness)
+        def unwrap(self, endianness: Endianness | None = None) -> np.dtype[np.dtypes.ObjectDType]:
+            return super().unwrap(endianness=endianness)
 
         def to_json_value(self, data: np.generic, *, zarr_format: ZarrFormat) -> str:
             return str(data)
@@ -552,7 +552,7 @@ def to_json_value(self, data: np.generic, *, zarr_format: ZarrFormat) -> str:
         def from_json_value(
             self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
         ) -> str:
-            return self.to_dtype(endianness=endianness).type(data)
+            return self.unwrap(endianness=endianness).type(data)
 
 
 def resolve_dtype(dtype: npt.DTypeLike | DTypeWrapper | dict[str, JSON]) -> DTypeWrapper:
@@ -569,7 +569,7 @@ def resolve_dtype(dtype: npt.DTypeLike | DTypeWrapper | dict[str, JSON]) -> DTyp
 INTEGER_DTYPE = Int8 | Int16 | Int32 | Int64 | UInt8 | UInt16 | UInt32 | UInt64
 FLOAT_DTYPE = Float16 | Float32 | Float64
 COMPLEX_DTYPE = Complex64 | Complex128
-STRING_DTYPE = StaticUnicodeString | VlenString | StaticByteString
+STRING_DTYPE = StaticUnicodeString | VariableLengthString | StaticByteString
 for dtype in get_args(
     Bool | INTEGER_DTYPE | FLOAT_DTYPE | COMPLEX_DTYPE | STRING_DTYPE | StaticRawBytes
 ):
diff --git a/src/zarr/core/metadata/v2.py b/src/zarr/core/metadata/v2.py
index 823944e067..8012aac02d 100644
--- a/src/zarr/core/metadata/v2.py
+++ b/src/zarr/core/metadata/v2.py
@@ -10,6 +10,8 @@
 import numcodecs.abc
 
 from zarr.abc.metadata import Metadata
+from zarr.core.metadata.dtype import DTypeWrapper
+from zarr.registry import get_data_type_from_numpy
 
 if TYPE_CHECKING:
     from typing import Any, Literal, Self
@@ -46,7 +48,7 @@ class ArrayV2MetadataDict(TypedDict):
 class ArrayV2Metadata(Metadata):
     shape: ChunkCoords
     chunks: ChunkCoords
-    dtype: np.dtype[Any]
+    dtype: DTypeWrapper[Any, Any]
     fill_value: int | float | str | bytes | None = 0
     order: MemoryOrder = "C"
     filters: tuple[numcodecs.abc.Codec, ...] | None = None
@@ -59,7 +61,7 @@ def __init__(
         self,
         *,
         shape: ChunkCoords,
-        dtype: npt.DTypeLike,
+        dtype: DTypeWrapper[Any, Any],
         chunks: ChunkCoords,
         fill_value: Any,
         order: MemoryOrder,
@@ -72,18 +74,17 @@ def __init__(
         Metadata for a Zarr format 2 array.
         """
         shape_parsed = parse_shapelike(shape)
-        dtype_parsed = parse_dtype(dtype)
         chunks_parsed = parse_shapelike(chunks)
 
         compressor_parsed = parse_compressor(compressor)
         order_parsed = parse_indexing_order(order)
         dimension_separator_parsed = parse_separator(dimension_separator)
         filters_parsed = parse_filters(filters)
-        fill_value_parsed = parse_fill_value(fill_value, dtype=dtype_parsed)
+        fill_value_parsed = parse_fill_value(fill_value, dtype=dtype.unwrap())
         attributes_parsed = parse_attributes(attributes)
 
         object.__setattr__(self, "shape", shape_parsed)
-        object.__setattr__(self, "dtype", dtype_parsed)
+        object.__setattr__(self, "dtype", dtype)
         object.__setattr__(self, "chunks", chunks_parsed)
         object.__setattr__(self, "compressor", compressor_parsed)
         object.__setattr__(self, "order", order_parsed)
@@ -163,9 +164,9 @@ def from_dict(cls, data: dict[str, Any]) -> ArrayV2Metadata:
         _data = data.copy()
         # check that the zarr_format attribute is correct
         _ = parse_zarr_format(_data.pop("zarr_format"))
-        dtype = parse_dtype(_data["dtype"])
-
-        if dtype.kind in "SV":
+        dtype = get_data_type_from_numpy(parse_dtype(_data["dtype"]))
+        _data["dtype"] = dtype
+        if dtype.unwrap().kind in "SV":
             fill_value_encoded = _data.get("fill_value")
             if fill_value_encoded is not None:
                 fill_value = base64.standard_b64decode(fill_value_encoded)
@@ -205,12 +206,13 @@ def to_dict(self) -> dict[str, JSON]:
 
         _ = zarray_dict.pop("dtype")
         dtype_json: JSON
+        # TODO: Replace this with per-dtype method
         # In the case of zarr v2, the simplest i.e., '|VXX' dtype is represented as a string
-        dtype_descr = self.dtype.descr
-        if self.dtype.kind == "V" and dtype_descr[0][0] != "" and len(dtype_descr) != 0:
-            dtype_json = tuple(self.dtype.descr)
+        dtype_descr = self.dtype.unwrap().descr
+        if self.dtype.unwrap().kind == "V" and dtype_descr[0][0] != "" and len(dtype_descr) != 0:
+            dtype_json = tuple(self.dtype.unwrap().descr)
         else:
-            dtype_json = self.dtype.str
+            dtype_json = self.dtype.unwrap().str
         zarray_dict["dtype"] = dtype_json
 
         return zarray_dict
@@ -377,42 +379,19 @@ def _default_fill_value(dtype: np.dtype[Any]) -> Any:
 
 
 def _default_compressor(
-    dtype: np.dtype[Any],
+    dtype: DTypeWrapper[Any, Any],
 ) -> dict[str, JSON] | None:
     """Get the default filters and compressor for a dtype.
 
     https://numpy.org/doc/2.1/reference/generated/numpy.dtype.kind.html
     """
     default_compressor = config.get("array.v2_default_compressor")
-    if dtype.kind in "biufcmM":
-        dtype_key = "numeric"
-    elif dtype.kind in "U":
-        dtype_key = "string"
-    elif dtype.kind in "OSV":
-        dtype_key = "bytes"
-    else:
-        raise ValueError(f"Unsupported dtype kind {dtype.kind}")
-
-    return cast(dict[str, JSON] | None, default_compressor.get(dtype_key, None))
+    return cast(dict[str, JSON] | None, default_compressor.get(dtype.kind, None))
 
 
 def _default_filters(
-    dtype: np.dtype[Any],
+    dtype: DTypeWrapper,
 ) -> list[dict[str, JSON]] | None:
-    """Get the default filters and compressor for a dtype.
-
-    https://numpy.org/doc/2.1/reference/generated/numpy.dtype.kind.html
-    """
+    """Get the default filters and compressor for a dtype."""
     default_filters = config.get("array.v2_default_filters")
-    if dtype.kind in "biufcmM":
-        dtype_key = "numeric"
-    elif dtype.kind in "U":
-        dtype_key = "string"
-    elif dtype.kind in "OS":
-        dtype_key = "bytes"
-    elif dtype.kind == "V":
-        dtype_key = "raw"
-    else:
-        raise ValueError(f"Unsupported dtype kind {dtype.kind}")
-
-    return cast(list[dict[str, JSON]] | None, default_filters.get(dtype_key, None))
+    return cast(list[dict[str, JSON]] | None, default_filters.get(dtype.kind, None))
diff --git a/src/zarr/core/metadata/v3.py b/src/zarr/core/metadata/v3.py
index 3966b0d72c..f70cbb3cf2 100644
--- a/src/zarr/core/metadata/v3.py
+++ b/src/zarr/core/metadata/v3.py
@@ -266,14 +266,14 @@ def __init__(
         chunk_grid_parsed = ChunkGrid.from_dict(chunk_grid)
         chunk_key_encoding_parsed = ChunkKeyEncoding.from_dict(chunk_key_encoding)
         dimension_names_parsed = parse_dimension_names(dimension_names)
-        fill_value_parsed = data_type.to_dtype().type(fill_value)
+        fill_value_parsed = data_type.unwrap().type(fill_value)
         attributes_parsed = parse_attributes(attributes)
         codecs_parsed_partial = parse_codecs(codecs)
         storage_transformers_parsed = parse_storage_transformers(storage_transformers)
 
         array_spec = ArraySpec(
             shape=shape_parsed,
-            dtype=data_type.to_dtype(),
+            dtype=data_type.unwrap(),
             fill_value=fill_value_parsed,
             config=ArrayConfig.from_dict({}),  # TODO: config is not needed here.
             prototype=default_buffer_prototype(),  # TODO: prototype is not needed here.
@@ -308,13 +308,13 @@ def _validate_metadata(self) -> None:
             raise ValueError("`fill_value` is required.")
         for codec in self.codecs:
             codec.validate(
-                shape=self.shape, dtype=self.data_type.to_dtype(), chunk_grid=self.chunk_grid
+                shape=self.shape, dtype=self.data_type.unwrap(), chunk_grid=self.chunk_grid
             )
 
     @property
     def dtype(self) -> np.dtype[Any]:
         """Interpret Zarr dtype as NumPy dtype"""
-        return self.data_type.to_dtype()
+        return self.data_type.unwrap()
 
     @property
     def ndim(self) -> int:
diff --git a/src/zarr/registry.py b/src/zarr/registry.py
index 373e118e78..7760c599fd 100644
--- a/src/zarr/registry.py
+++ b/src/zarr/registry.py
@@ -331,7 +331,7 @@ def get_data_type_from_numpy(dtype: npt.DTypeLike) -> DTypeWrapper:
     __data_type_registry.lazy_load()
     for val in __data_type_registry.contents.values():
         if val.dtype_cls is type(np_dtype):
-            return val.from_dtype(np_dtype)
+            return val.wrap(np_dtype)
     raise ValueError(
         f"numpy dtype '{dtype}' does not have a corresponding Zarr dtype in: {list(__data_type_registry.contents)}."
     )
diff --git a/tests/conftest.py b/tests/conftest.py
index 04034cb5b8..fb7b7977a7 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -259,7 +259,10 @@ def create_array_metadata(
     )
 
     shard_shape_parsed, chunk_shape_parsed = _auto_partition(
-        array_shape=shape_parsed, shard_shape=shards, chunk_shape=chunks, dtype=dtype_parsed
+        array_shape=shape_parsed,
+        shard_shape=shards,
+        chunk_shape=chunks,
+        dtype=dtype_parsed.unwrap().itemsize,
     )
 
     if order is None:
diff --git a/tests/test_array.py b/tests/test_array.py
index ce149d0f9a..959cf02055 100644
--- a/tests/test_array.py
+++ b/tests/test_array.py
@@ -927,7 +927,10 @@ def test_auto_partition_auto_shards(
             expected_shards += (cs,)
 
     auto_shards, _ = _auto_partition(
-        array_shape=array_shape, chunk_shape=chunk_shape, shard_shape="auto", dtype=dtype
+        array_shape=array_shape,
+        chunk_shape=chunk_shape,
+        shard_shape="auto",
+        item_size=dtype.itemsize,
     )
     assert auto_shards == expected_shards
 
@@ -1079,7 +1082,10 @@ async def test_v3_chunk_encoding(
             compressors=compressors,
         )
         filters_expected, _, compressors_expected = _parse_chunk_encoding_v3(
-            filters=filters, compressors=compressors, serializer="auto", dtype=np.dtype(dtype)
+            filters=filters,
+            compressors=compressors,
+            serializer="auto",
+            dtype=arr.metadata.data_type,
         )
         assert arr.filters == filters_expected
         assert arr.compressors == compressors_expected
@@ -1145,7 +1151,7 @@ async def test_default_filters_compressors(
 
         elif zarr_format == 2:
             default_filters, default_compressors = _get_default_chunk_encoding_v2(
-                np_dtype=np.dtype(dtype)
+                dtype=np.dtype(dtype)
             )
             if default_filters is None:
                 expected_filters = ()
diff --git a/tests/test_metadata/test_v3.py b/tests/test_metadata/test_v3.py
index b5ca92c568..54e077f1a6 100644
--- a/tests/test_metadata/test_v3.py
+++ b/tests/test_metadata/test_v3.py
@@ -12,7 +12,7 @@
 from zarr.core.chunk_key_encodings import DefaultChunkKeyEncoding, V2ChunkKeyEncoding
 from zarr.core.config import config
 from zarr.core.group import GroupMetadata, parse_node_type
-from zarr.core.metadata.dtype import FlexibleWrapperBase, complex_from_json
+from zarr.core.metadata.dtype import complex_from_json
 from zarr.core.metadata.v3 import (
     ArrayV3Metadata,
     parse_dimension_names,
@@ -54,13 +54,20 @@
 )
 
 complex_dtypes = ("complex64", "complex128")
-flexible_dtypes = ("str", "bytes", 'void')
+flexible_dtypes = ("str", "bytes", "void")
 if _NUMPY_SUPPORTS_VLEN_STRING:
-    vlen_string_dtypes = ("T","O")
+    vlen_string_dtypes = ("T", "O")
 else:
-    vlen_string_dtypes = ("O")
-
-dtypes = (*bool_dtypes, *int_dtypes, *float_dtypes, *complex_dtypes, *flexible_dtypes, *vlen_string_dtypes)
+    vlen_string_dtypes = "O"
+
+dtypes = (
+    *bool_dtypes,
+    *int_dtypes,
+    *float_dtypes,
+    *complex_dtypes,
+    *flexible_dtypes,
+    *vlen_string_dtypes,
+)
 
 
 @pytest.mark.parametrize("data", [None, 1, 2, 4, 5, "3"])
@@ -121,7 +128,7 @@ def test_jsonify_fill_value_complex(fill_value: Any, dtype_str: str) -> None:
     """
     zarr_format = 3
     dtype = get_data_type_from_numpy(dtype_str)
-    expected = dtype.to_dtype().type(complex(*fill_value))
+    expected = dtype.unwrap().type(complex(*fill_value))
     observed = dtype.from_json_value(fill_value, zarr_format=zarr_format)
     assert observed == expected
     assert dtype.to_json_value(observed, zarr_format=zarr_format) == tuple(fill_value)
@@ -330,7 +337,6 @@ async def test_special_float_fill_values(fill_value: str) -> None:
 @pytest.mark.parametrize("dtype_str", dtypes)
 def test_dtypes(dtype_str: str) -> None:
     dt = get_data_type_from_numpy(dtype_str)
-    np_dtype = dt.to_dtype()
+    np_dtype = dt.unwrap()
     assert isinstance(np_dtype, dt.dtype_cls)
     assert np_dtype.type(0) == dt.cast_value(0)
-

From 3c232a406264716fd14cb6a1dab9a91fc6a22632 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Mon, 3 Mar 2025 10:44:34 +0100
Subject: [PATCH 013/130] push into v2

---
 src/zarr/api/asynchronous.py |  3 +--
 src/zarr/core/array.py       |  5 ++---
 src/zarr/core/buffer/cpu.py  |  4 ++--
 src/zarr/core/common.py      |  6 ------
 src/zarr/core/metadata/v2.py |  4 +++-
 src/zarr/core/metadata/v3.py | 10 ++++++----
 tests/conftest.py            |  5 +++--
 7 files changed, 17 insertions(+), 20 deletions(-)

diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py
index d8462b72ef..f1131003fc 100644
--- a/src/zarr/api/asynchronous.py
+++ b/src/zarr/api/asynchronous.py
@@ -21,7 +21,6 @@
     _default_zarr_format,
     _warn_order_kwarg,
     _warn_write_empty_chunks_kwarg,
-    parse_dtype,
 )
 from zarr.core.group import (
     AsyncGroup,
@@ -980,7 +979,7 @@ async def create(
         _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format)
         or _default_zarr_format()
     )
-    dtype_wrapped = parse_dtype(dtype, zarr_format=zarr_format)
+    dtype_wrapped = get_data_type_from_numpy(dtype)
     if zarr_format == 2:
         if chunks is None:
             chunks = shape
diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py
index 3b1e6a973f..9abb330d59 100644
--- a/src/zarr/core/array.py
+++ b/src/zarr/core/array.py
@@ -58,7 +58,6 @@
     _default_zarr_format,
     _warn_order_kwarg,
     concurrent_map,
-    parse_dtype,
     parse_order,
     parse_shapelike,
     product,
@@ -1034,7 +1033,7 @@ def compressors(self) -> tuple[numcodecs.abc.Codec, ...] | tuple[BytesBytesCodec
         )
 
     @property
-    def dtype(self) -> np.dtype[Any]:
+    def dtype(self) -> DTypeWrapper[Any, Any]:
         """Returns the data type of the array.
 
         Returns
@@ -3918,7 +3917,7 @@ async def init_array(
 
     from zarr.codecs.sharding import ShardingCodec, ShardingCodecIndexLocation
 
-    dtype_wrapped = parse_dtype(dtype, zarr_format=zarr_format)
+    dtype_wrapped = get_data_type_from_numpy(dtype)
     shape_parsed = parse_shapelike(shape)
     chunk_key_encoding_parsed = _parse_chunk_key_encoding(
         chunk_key_encoding, zarr_format=zarr_format
diff --git a/src/zarr/core/buffer/cpu.py b/src/zarr/core/buffer/cpu.py
index b83f710747..00444a6f76 100644
--- a/src/zarr/core/buffer/cpu.py
+++ b/src/zarr/core/buffer/cpu.py
@@ -151,7 +151,7 @@ def create(
         cls,
         *,
         shape: Iterable[int],
-        dtype: DTypeWrapper[Any, Any],
+        dtype: np.dtype[Any],
         order: Literal["C", "F"] = "C",
         fill_value: Any | None = None,
     ) -> Self:
@@ -160,7 +160,7 @@ def create(
         else:
             return cls(
                 np.full(
-                    shape=tuple(shape), fill_value=fill_value, dtype=dtype.unwrap(), order=order
+                    shape=tuple(shape), fill_value=fill_value, dtype=dtype, order=order
                 )
             )
 
diff --git a/src/zarr/core/common.py b/src/zarr/core/common.py
index 85dadc2b53..5543fa9086 100644
--- a/src/zarr/core/common.py
+++ b/src/zarr/core/common.py
@@ -166,12 +166,6 @@ def parse_bool(data: Any) -> bool:
     raise ValueError(f"Expected bool, got {data} instead.")
 
 
-def parse_dtype(dtype: Any, zarr_format: ZarrFormat) -> DTypeWrapper[Any, Any]:
-    from zarr.registry import get_data_type_from_numpy
-
-    return get_data_type_from_numpy(np.dtype(dtype))
-
-
 def _warn_write_empty_chunks_kwarg() -> None:
     # TODO: link to docs page on array configuration in this message
     msg = (
diff --git a/src/zarr/core/metadata/v2.py b/src/zarr/core/metadata/v2.py
index 8012aac02d..2ba2ac5c45 100644
--- a/src/zarr/core/metadata/v2.py
+++ b/src/zarr/core/metadata/v2.py
@@ -75,7 +75,9 @@ def __init__(
         """
         shape_parsed = parse_shapelike(shape)
         chunks_parsed = parse_shapelike(chunks)
-
+        # TODO: remove this
+        if not isinstance(dtype, DTypeWrapper):
+            raise TypeError
         compressor_parsed = parse_compressor(compressor)
         order_parsed = parse_indexing_order(order)
         dimension_separator_parsed = parse_separator(dimension_separator)
diff --git a/src/zarr/core/metadata/v3.py b/src/zarr/core/metadata/v3.py
index f70cbb3cf2..8bf20899c3 100644
--- a/src/zarr/core/metadata/v3.py
+++ b/src/zarr/core/metadata/v3.py
@@ -4,7 +4,7 @@
 
 from zarr.abc.metadata import Metadata
 from zarr.core.buffer.core import default_buffer_prototype
-
+from zarr.core.metadata.dtype import DTypeWrapper
 if TYPE_CHECKING:
     from collections.abc import Callable
     from typing import Self
@@ -12,9 +12,7 @@
     from zarr.core.buffer import Buffer, BufferPrototype
     from zarr.core.chunk_grids import ChunkGrid
     from zarr.core.common import JSON, ChunkCoords
-    from zarr.core.metadata.dtype import (
-        DTypeWrapper,
-    )
+    
 
 import json
 from collections.abc import Iterable
@@ -262,6 +260,10 @@ def __init__(
         """
         Because the class is a frozen dataclass, we set attributes using object.__setattr__
         """
+
+        # TODO: remove this
+        if not isinstance(data_type, DTypeWrapper):
+            raise TypeError
         shape_parsed = parse_shapelike(shape)
         chunk_grid_parsed = ChunkGrid.from_dict(chunk_grid)
         chunk_key_encoding_parsed = ChunkKeyEncoding.from_dict(chunk_key_encoding)
diff --git a/tests/conftest.py b/tests/conftest.py
index fb7b7977a7..a650accc51 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -18,11 +18,12 @@
     _parse_chunk_key_encoding,
 )
 from zarr.core.chunk_grids import RegularChunkGrid, _auto_partition
-from zarr.core.common import JSON, parse_dtype, parse_shapelike
+from zarr.core.common import JSON, parse_shapelike
 from zarr.core.config import config as zarr_config
 from zarr.core.metadata.v2 import ArrayV2Metadata
 from zarr.core.metadata.v3 import ArrayV3Metadata
 from zarr.core.sync import sync
+from zarr.registry import get_data_type_from_numpy
 from zarr.storage import FsspecStore, LocalStore, MemoryStore, StorePath, ZipStore
 
 if TYPE_CHECKING:
@@ -252,7 +253,7 @@ def create_array_metadata(
     """
     Create array metadata
     """
-    dtype_parsed = parse_dtype(dtype, zarr_format=zarr_format)
+    dtype_parsed = get_data_type_from_numpy(dtype)
     shape_parsed = parse_shapelike(shape)
     chunk_key_encoding_parsed = _parse_chunk_key_encoding(
         chunk_key_encoding, zarr_format=zarr_format

From b7fe98640a548e17f39fdf21e6b0a93186d4cabf Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Mon, 3 Mar 2025 15:03:01 +0100
Subject: [PATCH 014/130] remove endianness kwarg to methods, make it an
 instance variable instead

---
 src/zarr/core/metadata/dtype.py | 107 +++++++++++++-------------------
 1 file changed, 42 insertions(+), 65 deletions(-)

diff --git a/src/zarr/core/metadata/dtype.py b/src/zarr/core/metadata/dtype.py
index 5d382076b4..f88683e1e7 100644
--- a/src/zarr/core/metadata/dtype.py
+++ b/src/zarr/core/metadata/dtype.py
@@ -1,6 +1,6 @@
 from abc import ABC, abstractmethod
 from collections.abc import Sequence
-from dataclasses import dataclass
+from dataclasses import dataclass, replace
 from typing import Any, ClassVar, Generic, Literal, Self, TypeGuard, TypeVar, cast, get_args
 
 import numpy as np
@@ -199,11 +199,13 @@ def complex_from_json(
 TScalar = TypeVar("TScalar", bound=np.generic)
 
 
+@dataclass(frozen=True, kw_only=True)
 class DTypeWrapper(Generic[TDType, TScalar], ABC, Metadata):
     name: ClassVar[str]
     dtype_cls: ClassVar[type[TDType]]  # this class will create a numpy dtype
     kind: ClassVar[DataTypeFlavor]
-    default_value: TScalar
+    default_value: ClassVar[TScalar]
+    endianness: Endianness = "native"
 
     def __init_subclass__(cls) -> None:
         # Subclasses will bind the first generic type parameter to an attribute of the class
@@ -215,18 +217,21 @@ def __init_subclass__(cls) -> None:
     def to_dict(self) -> dict[str, JSON]:
         return {"name": self.name}
 
-    def cast_value(self: Self, value: object, *, endianness: Endianness | None = None) -> TScalar:
-        return cast(np.generic, self.unwrap(endianness=endianness).type(value))
+    def cast_value(self: Self, value: object) -> TScalar:
+        return cast(np.generic, self.unwrap().type(value))
 
     @classmethod
     @abstractmethod
     def wrap(cls: type[Self], dtype: TDType) -> Self:
         raise NotImplementedError
 
-    def unwrap(self: Self, *, endianness: Endianness | None = None) -> TDType:
-        endian_str = endianness_to_numpy_str(endianness)
+    def unwrap(self: Self) -> TDType:
+        endian_str = endianness_to_numpy_str(self.endianness)
         return self.dtype_cls().newbyteorder(endian_str)
 
+    def with_endianness(self: Self, endianness: Endianness) -> Self:
+        return replace(self, endianness=endianness)
+
     @abstractmethod
     def to_json_value(self, data: np.generic, *, zarr_format: ZarrFormat) -> JSON:
         """
@@ -235,9 +240,7 @@ def to_json_value(self, data: np.generic, *, zarr_format: ZarrFormat) -> JSON:
         raise NotImplementedError
 
     @abstractmethod
-    def from_json_value(
-        self: Self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
-    ) -> TScalar:
+    def from_json_value(self: Self, data: JSON, *, zarr_format: ZarrFormat) -> TScalar:
         """
         Read a JSON-serializable value as a numpy scalar
         """
@@ -257,11 +260,9 @@ def wrap(cls, dtype: np.dtypes.BoolDType) -> Self:
     def to_json_value(self, data: np.generic, zarr_format: ZarrFormat) -> bool:
         return bool(data)
 
-    def from_json_value(
-        self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
-    ) -> np.bool_:
+    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.bool_:
         if check_json_bool(data):
-            return self.unwrap(endianness=endianness).type(data)
+            return self.unwrap().type(data)
         raise TypeError(f"Invalid type: {data}. Expected a boolean.")
 
 
@@ -275,11 +276,9 @@ def wrap(cls, dtype: TDType) -> Self:
     def to_json_value(self, data: np.generic, zarr_format: ZarrFormat) -> int:
         return int(data)
 
-    def from_json_value(
-        self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
-    ) -> TScalar:
+    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> TScalar:
         if check_json_int(data):
-            return self.unwrap(endianness=endianness).type(data)
+            return self.unwrap().type(data)
         raise TypeError(f"Invalid type: {data}. Expected an integer.")
 
 
@@ -341,11 +340,9 @@ def wrap(cls, dtype: TDType) -> Self:
     def to_json_value(self, data: np.generic, zarr_format: ZarrFormat) -> JSONFloat:
         return float_to_json(data, zarr_format)
 
-    def from_json_value(
-        self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
-    ) -> TScalar:
+    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> TScalar:
         if check_json_float_v2(data):
-            return self.unwrap(endianness=endianness).type(float_from_json(data, zarr_format))
+            return self.unwrap().type(float_from_json(data, zarr_format))
         raise TypeError(f"Invalid type: {data}. Expected a float.")
 
 
@@ -382,13 +379,9 @@ def to_json_value(
     ) -> tuple[JSONFloat, JSONFloat]:
         return complex_to_json(data, zarr_format)
 
-    def from_json_value(
-        self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
-    ) -> np.complex64:
+    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.complex64:
         if check_json_complex_float_v3(data):
-            return complex_from_json(
-                data, dtype=self.unwrap(endianness=endianness), zarr_format=zarr_format
-            )
+            return complex_from_json(data, dtype=self.unwrap(), zarr_format=zarr_format)
         raise TypeError(f"Invalid type: {data}. Expected a complex float.")
 
 
@@ -407,13 +400,9 @@ def to_json_value(
     ) -> tuple[JSONFloat, JSONFloat]:
         return complex_to_json(data, zarr_format)
 
-    def from_json_value(
-        self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
-    ) -> np.complex128:
+    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.complex128:
         if check_json_complex_float_v3(data):
-            return complex_from_json(
-                data, dtype=self.unwrap(endianness=endianness), zarr_format=zarr_format
-            )
+            return complex_from_json(data, dtype=self.unwrap(), zarr_format=zarr_format)
         raise TypeError(f"Invalid type: {data}. Expected a complex float.")
 
 
@@ -426,8 +415,8 @@ class FlexibleWrapperBase(DTypeWrapper[TDType, TScalar]):
     def wrap(cls, dtype: TDType) -> Self:
         return cls(length=dtype.itemsize // (cls.item_size_bits // 8))
 
-    def unwrap(self, endianness: Endianness | None = None) -> TDType:
-        endianness_code = endianness_to_numpy_str(endianness)
+    def unwrap(self) -> TDType:
+        endianness_code = endianness_to_numpy_str(self.endianness)
         return self.dtype_cls(self.length).newbyteorder(endianness_code)
 
 
@@ -435,22 +424,18 @@ def unwrap(self, endianness: Endianness | None = None) -> TDType:
 class StaticByteString(FlexibleWrapperBase[np.dtypes.BytesDType, np.bytes_]):
     name = "numpy/static_byte_string"
     kind = "string"
-    default_value = b""
+    default_value = np.bytes_(0)
     item_size_bits = 8
 
     def to_dict(self) -> dict[str, JSON]:
         return {"name": self.name, "configuration": {"length": self.length}}
 
-    def to_json_value(
-        self, data: np.generic, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
-    ) -> str:
+    def to_json_value(self, data: np.generic, *, zarr_format: ZarrFormat) -> str:
         return data.tobytes().decode("ascii")
 
-    def from_json_value(
-        self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
-    ) -> np.bytes_:
+    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.bytes_:
         if check_json_bool(data):
-            return self.unwrap(endianness=endianness).type(data.encode("ascii"))
+            return self.unwrap().type(data.encode("ascii"))
         raise TypeError(f"Invalid type: {data}. Expected a string.")
 
 
@@ -464,20 +449,18 @@ class StaticRawBytes(FlexibleWrapperBase[np.dtypes.VoidDType, np.void]):
     def to_dict(self) -> dict[str, JSON]:
         return {"name": f"r{self.length * self.item_size_bits}"}
 
-    def unwrap(self, endianness: Endianness | None = None) -> np.dtypes.VoidDType:
+    def unwrap(self) -> np.dtypes.VoidDType:
         # this needs to be overridden because numpy does not allow creating a void type
         # by invoking np.dtypes.VoidDType directly
-        endianness_code = endianness_to_numpy_str(endianness)
+        endianness_code = endianness_to_numpy_str(self.endianness)
         return np.dtype(f"{endianness_code}V{self.length}")
 
     def to_json_value(self, data: np.generic, *, zarr_format: ZarrFormat) -> tuple[int, ...]:
         return tuple(*data.tobytes())
 
-    def from_json_value(
-        self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
-    ) -> np.void:
+    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.void:
         # todo: check that this is well-formed
-        return self.unwrap(endianness=endianness).type(bytes(data))
+        return self.unwrap().type(bytes(data))
 
 
 @dataclass(frozen=True, kw_only=True)
@@ -493,12 +476,10 @@ def to_dict(self) -> dict[str, JSON]:
     def to_json_value(self, data: np.generic, *, zarr_format: ZarrFormat) -> str:
         return str(data)
 
-    def from_json_value(
-        self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
-    ) -> np.str_:
+    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.str_:
         if not check_json_str(data):
             raise TypeError(f"Invalid type: {data}. Expected a string.")
-        return self.unwrap(endianness=endianness).type(data)
+        return self.unwrap().type(data)
 
 
 if _NUMPY_SUPPORTS_VLEN_STRING:
@@ -516,17 +497,15 @@ def wrap(cls, dtype: np.dtypes.StringDType) -> Self:
         def to_dict(self) -> dict[str, JSON]:
             return {"name": self.name}
 
-        def unwrap(self, endianness: Endianness | None = None) -> np.dtypes.StringDType:
-            endianness_code = endianness_to_numpy_str(endianness)
+        def unwrap(self) -> np.dtypes.StringDType:
+            endianness_code = endianness_to_numpy_str(self.endianness)
             return np.dtype(endianness_code + self.numpy_character_code)
 
         def to_json_value(self, data: np.generic, *, zarr_format: ZarrFormat) -> str:
             return str(data)
 
-        def from_json_value(
-            self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
-        ) -> str:
-            return self.unwrap(endianness=endianness).type(data)
+        def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> str:
+            return self.unwrap().type(data)
 
 else:
 
@@ -543,16 +522,14 @@ def to_dict(self) -> dict[str, JSON]:
         def wrap(cls, dtype: np.dtypes.ObjectDType) -> Self:
             return cls()
 
-        def unwrap(self, endianness: Endianness | None = None) -> np.dtype[np.dtypes.ObjectDType]:
-            return super().unwrap(endianness=endianness)
+        def unwrap(self) -> np.dtype[np.dtypes.ObjectDType]:
+            return super().unwrap()
 
         def to_json_value(self, data: np.generic, *, zarr_format: ZarrFormat) -> str:
             return str(data)
 
-        def from_json_value(
-            self, data: JSON, *, zarr_format: ZarrFormat, endianness: Endianness | None = None
-        ) -> str:
-            return self.unwrap(endianness=endianness).type(data)
+        def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> str:
+            return self.unwrap().type(data)
 
 
 def resolve_dtype(dtype: npt.DTypeLike | DTypeWrapper | dict[str, JSON]) -> DTypeWrapper:

From d9b44b4cb5973714b23cf2ae25727235f5de8e0b Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Tue, 4 Mar 2025 18:10:20 +0100
Subject: [PATCH 015/130] make wrapping safe by default

---
 src/zarr/api/asynchronous.py             |  20 ++-
 src/zarr/codecs/blosc.py                 |   8 +-
 src/zarr/codecs/bytes.py                 |   8 +-
 src/zarr/codecs/sharding.py              |  10 +-
 src/zarr/core/array.py                   | 181 ++++++++++---------
 src/zarr/core/array_spec.py              |   5 +-
 src/zarr/core/codec_pipeline.py          |   5 +-
 src/zarr/core/metadata/dtype.py          | 215 ++++++++++++++++++++---
 src/zarr/core/metadata/v2.py             |  50 ++----
 src/zarr/core/metadata/v3.py             |  37 ++--
 src/zarr/registry.py                     |  70 +-------
 tests/conftest.py                        |  12 +-
 tests/test_array.py                      |  83 ++-------
 tests/test_codecs/test_vlen.py           |  44 +----
 tests/test_group.py                      |   2 +-
 tests/test_metadata/test_consolidated.py |   3 +-
 tests/test_metadata/test_v2.py           |  10 +-
 tests/test_metadata/test_v3.py           |   9 +-
 tests/test_v2.py                         |  77 ++------
 19 files changed, 405 insertions(+), 444 deletions(-)

diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py
index f1131003fc..d882b1d7cc 100644
--- a/src/zarr/api/asynchronous.py
+++ b/src/zarr/api/asynchronous.py
@@ -9,7 +9,13 @@
 import numpy.typing as npt
 from typing_extensions import deprecated
 
-from zarr.core.array import Array, AsyncArray, create_array, get_array_metadata
+from zarr.core.array import (
+    Array,
+    AsyncArray,
+    _get_default_chunk_encoding_v2,
+    create_array,
+    get_array_metadata,
+)
 from zarr.core.array_spec import ArrayConfig, ArrayConfigLike, ArrayConfigParams
 from zarr.core.buffer import NDArrayLike
 from zarr.core.common import (
@@ -29,9 +35,8 @@
     create_hierarchy,
 )
 from zarr.core.metadata import ArrayMetadataDict, ArrayV2Metadata, ArrayV3Metadata
-from zarr.core.metadata.v2 import _default_compressor, _default_filters
+from zarr.core.metadata.dtype import get_data_type_from_numpy
 from zarr.errors import NodeTypeValidationError
-from zarr.registry import get_data_type_from_numpy
 from zarr.storage._common import make_store_path
 
 if TYPE_CHECKING:
@@ -983,10 +988,11 @@ async def create(
     if zarr_format == 2:
         if chunks is None:
             chunks = shape
-        if not filters:
-            filters = _default_filters(dtype_wrapped)
-        if not compressor:
-            compressor = _default_compressor(dtype_wrapped)
+        default_filters, default_compressor = _get_default_chunk_encoding_v2(dtype_wrapped)
+        if filters is None:
+            filters = default_filters
+        if compressor is None:
+            compressor = default_compressor
     elif zarr_format == 3 and chunk_shape is None:  # type: ignore[redundant-expr]
         if chunks is not None:
             chunk_shape = chunks
diff --git a/src/zarr/codecs/blosc.py b/src/zarr/codecs/blosc.py
index 54a23c9c57..0db9e830f1 100644
--- a/src/zarr/codecs/blosc.py
+++ b/src/zarr/codecs/blosc.py
@@ -139,11 +139,15 @@ def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self:
         dtype = array_spec.dtype
         new_codec = self
         if new_codec.typesize is None:
-            new_codec = replace(new_codec, typesize=dtype.itemsize)
+            new_codec = replace(new_codec, typesize=dtype.unwrap().itemsize)
         if new_codec.shuffle is None:
             new_codec = replace(
                 new_codec,
-                shuffle=(BloscShuffle.bitshuffle if dtype.itemsize == 1 else BloscShuffle.shuffle),
+                shuffle=(
+                    BloscShuffle.bitshuffle
+                    if dtype.unwrap().itemsize == 1
+                    else BloscShuffle.shuffle
+                ),
             )
 
         return new_codec
diff --git a/src/zarr/codecs/bytes.py b/src/zarr/codecs/bytes.py
index 4875d8e8d8..1da497ea72 100644
--- a/src/zarr/codecs/bytes.py
+++ b/src/zarr/codecs/bytes.py
@@ -56,7 +56,7 @@ def to_dict(self) -> dict[str, JSON]:
             return {"name": "bytes", "configuration": {"endian": self.endian.value}}
 
     def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self:
-        if array_spec.dtype.unwrap().itemsize == 0:
+        if array_spec.dtype.unwrap().itemsize == 1:
             if self.endian is not None:
                 return replace(self, endian=None)
         elif self.endian is None:
@@ -71,14 +71,14 @@ async def _decode_single(
         chunk_spec: ArraySpec,
     ) -> NDBuffer:
         assert isinstance(chunk_bytes, Buffer)
-        if chunk_spec.dtype.itemsize > 0:
+        if chunk_spec.dtype.unwrap().itemsize > 0:
             if self.endian == Endian.little:
                 prefix = "<"
             else:
                 prefix = ">"
-            dtype = np.dtype(f"{prefix}{chunk_spec.dtype.str[1:]}")
+            dtype = np.dtype(f"{prefix}{chunk_spec.dtype.unwrap().str[1:]}")
         else:
-            dtype = np.dtype(f"|{chunk_spec.dtype.str[1:]}")
+            dtype = np.dtype(f"|{chunk_spec.dtype.unwrap().str[1:]}")
 
         as_array_like = chunk_bytes.as_array_like()
         if isinstance(as_array_like, NDArrayLike):
diff --git a/src/zarr/codecs/sharding.py b/src/zarr/codecs/sharding.py
index 09ceb538d0..7163a5fd7f 100644
--- a/src/zarr/codecs/sharding.py
+++ b/src/zarr/codecs/sharding.py
@@ -50,6 +50,7 @@
     get_indexer,
     morton_order_iter,
 )
+from zarr.core.metadata.dtype import DTypeWrapper
 from zarr.core.metadata.v3 import parse_codecs
 from zarr.registry import get_ndbuffer_class, get_pipeline_class, register_codec
 
@@ -403,7 +404,9 @@ def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self:
             return replace(self, codecs=evolved_codecs)
         return self
 
-    def validate(self, *, shape: ChunkCoords, dtype: np.dtype[Any], chunk_grid: ChunkGrid) -> None:
+    def validate(
+        self, *, shape: ChunkCoords, dtype: DTypeWrapper[Any, Any], chunk_grid: ChunkGrid
+    ) -> None:
         if len(self.chunk_shape) != len(shape):
             raise ValueError(
                 "The shard's `chunk_shape` and array's `shape` need to have the same number of dimensions."
@@ -484,7 +487,10 @@ async def _decode_partial_single(
 
         # setup output array
         out = shard_spec.prototype.nd_buffer.create(
-            shape=indexer.shape, dtype=shard_spec.dtype, order=shard_spec.order, fill_value=0
+            shape=indexer.shape,
+            dtype=shard_spec.dtype.unwrap(),
+            order=shard_spec.order,
+            fill_value=0,
         )
 
         indexed_chunks = list(indexer)
diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py
index 9abb330d59..f8c6fced9f 100644
--- a/src/zarr/core/array.py
+++ b/src/zarr/core/array.py
@@ -5,6 +5,7 @@
 from asyncio import gather
 from collections.abc import Iterable
 from dataclasses import dataclass, field, replace
+from functools import cached_property
 from itertools import starmap
 from logging import getLogger
 from typing import (
@@ -29,8 +30,11 @@
 from zarr.abc.codec import ArrayArrayCodec, ArrayBytesCodec, BytesBytesCodec, Codec
 from zarr.abc.store import Store, set_or_delete
 from zarr.codecs._v2 import V2Codec
+from zarr.codecs.bytes import BytesCodec
+from zarr.codecs.vlen_utf8 import VLenBytesCodec, VLenUTF8Codec
+from zarr.codecs.zstd import ZstdCodec
 from zarr.core._info import ArrayInfo
-from zarr.core.array_spec import ArrayConfig, ArrayConfigLike, parse_array_config
+from zarr.core.array_spec import ArrayConfig, ArrayConfigLike, ArraySpec, parse_array_config
 from zarr.core.attributes import Attributes
 from zarr.core.buffer import (
     BufferPrototype,
@@ -97,10 +101,13 @@
     ArrayV3MetadataDict,
     T_ArrayMetadata,
 )
-from zarr.core.metadata.dtype import DTypeWrapper, VariableLengthString
+from zarr.core.metadata.dtype import (
+    DTypeWrapper,
+    StaticByteString,
+    VariableLengthString,
+    get_data_type_from_numpy,
+)
 from zarr.core.metadata.v2 import (
-    _default_compressor,
-    _default_filters,
     parse_compressor,
     parse_filters,
 )
@@ -111,7 +118,6 @@
     _parse_array_array_codec,
     _parse_array_bytes_codec,
     _parse_bytes_bytes_codec,
-    get_data_type_from_numpy,
     get_pipeline_class,
 )
 from zarr.storage._common import StorePath, ensure_no_existing_node, make_store_path
@@ -548,7 +554,7 @@ async def _create(
         *,
         # v2 and v3
         shape: ShapeLike,
-        dtype: npt.DTypeLike[Any],
+        dtype: npt.DTypeLike[Any] | DTypeWrapper[Any, Any],
         zarr_format: ZarrFormat = 3,
         fill_value: Any | None = None,
         attributes: dict[str, JSON] | None = None,
@@ -682,14 +688,19 @@ def _create_metadata_v3(
         """
 
         shape = parse_shapelike(shape)
-        codecs = list(codecs) if codecs is not None else _get_default_codecs(dtype)
+        if codecs is None:
+            filters, serializer, compressors = _get_default_chunk_encoding_v3(dtype)
+            codecs_parsed = (*filters, serializer, *compressors)
+        else:
+            codecs_parsed = tuple(codecs)
+
         chunk_key_encoding_parsed: ChunkKeyEncodingLike
         if chunk_key_encoding is None:
             chunk_key_encoding_parsed = {"name": "default", "separator": "/"}
         else:
             chunk_key_encoding_parsed = chunk_key_encoding
 
-        if dtype.kind in "UTS":
+        if dtype.unwrap().kind in ("U", "T", "S"):
             warn(
                 f"The dtype `{dtype}` is currently not part in the Zarr format 3 specification. It "
                 "may not be supported by other zarr implementations and may change in the future.",
@@ -710,7 +721,7 @@ def _create_metadata_v3(
             chunk_grid=chunk_grid_parsed,
             chunk_key_encoding=chunk_key_encoding_parsed,
             fill_value=fill_value_parsed,
-            codecs=codecs,
+            codecs=codecs_parsed,
             dimension_names=tuple(dimension_names) if dimension_names else None,
             attributes=attributes or {},
         )
@@ -721,7 +732,7 @@ async def _create_v3(
         store_path: StorePath,
         *,
         shape: ShapeLike,
-        dtype: np.dtype[Any],
+        dtype: DTypeWrapper[Any, Any],
         chunk_shape: ChunkCoords,
         config: ArrayConfig,
         fill_value: Any | None = None,
@@ -781,14 +792,6 @@ def _create_metadata_v2(
         if dimension_separator is None:
             dimension_separator = "."
 
-        # inject VLenUTF8 for str dtype if not already present
-        if isinstance(dtype, VariableLengthString):
-            filters = filters or []
-            from numcodecs.vlen import VLenUTF8
-
-            if not any(isinstance(x, VLenUTF8) or x["id"] == "vlen-utf8" for x in filters):
-                filters = list(filters) + [VLenUTF8()]
-
         return ArrayV2Metadata(
             shape=shape,
             dtype=dtype,
@@ -807,7 +810,7 @@ async def _create_v2(
         store_path: StorePath,
         *,
         shape: ChunkCoords,
-        dtype: np.dtype[Any],
+        dtype: DTypeWrapper[Any, Any],
         chunks: ChunkCoords,
         order: MemoryOrder,
         config: ArrayConfig,
@@ -949,6 +952,13 @@ def chunks(self) -> ChunkCoords:
         """
         return self.metadata.chunks
 
+    @cached_property
+    def chunk_grid(self) -> RegularChunkGrid:
+        if self.metadata.zarr_format == 2:
+            return RegularChunkGrid(chunk_shape=self.chunks)
+        else:
+            return self.metadata.chunk_grid
+
     @property
     def shards(self) -> ChunkCoords | None:
         """Returns the shard shape of the Array.
@@ -1033,7 +1043,7 @@ def compressors(self) -> tuple[numcodecs.abc.Codec, ...] | tuple[BytesBytesCodec
         )
 
     @property
-    def dtype(self) -> DTypeWrapper[Any, Any]:
+    def dtype(self) -> np.dtype[Any]:
         """Returns the data type of the array.
 
         Returns
@@ -1041,7 +1051,10 @@ def dtype(self) -> DTypeWrapper[Any, Any]:
         np.dtype
             Data type of the array
         """
-        return self.metadata.dtype
+        if self.metadata.zarr_format == 2:
+            return self.metadata.dtype.unwrap()
+        else:
+            return self.metadata.data_type.unwrap()
 
     @property
     def order(self) -> MemoryOrder:
@@ -1259,6 +1272,20 @@ def nbytes(self) -> int:
         """
         return self.size * self.dtype.itemsize
 
+    def get_chunk_spec(
+        self, _chunk_coords: ChunkCoords, array_config: ArrayConfig, prototype: BufferPrototype
+    ) -> ArraySpec:
+        assert isinstance(self.chunk_grid, RegularChunkGrid), (
+            "Currently, only regular chunk grid is supported"
+        )
+        return ArraySpec(
+            shape=self.chunk_grid.chunk_shape,
+            dtype=self.dtype,
+            fill_value=self.metadata.fill_value,
+            config=array_config,
+            prototype=prototype,
+        )
+
     async def _get_selection(
         self,
         indexer: Indexer,
@@ -1298,7 +1325,7 @@ async def _get_selection(
                 [
                     (
                         self.store_path / self.metadata.encode_chunk_key(chunk_coords),
-                        self.metadata.get_chunk_spec(chunk_coords, _config, prototype=prototype),
+                        self.get_chunk_spec(chunk_coords, _config, prototype=prototype),
                         chunk_selection,
                         out_selection,
                         is_complete_chunk,
@@ -1351,7 +1378,7 @@ async def getitem(
         indexer = BasicIndexer(
             selection,
             shape=self.metadata.shape,
-            chunk_grid=self.metadata.chunk_grid,
+            chunk_grid=self.chunk_grid,
         )
         return await self._get_selection(indexer, prototype=prototype)
 
@@ -1397,19 +1424,19 @@ async def _set_selection(
                 # TODO: need to handle array types that don't support __array_function__
                 # like PyTorch and JAX
                 array_like_ = cast(np._typing._SupportsArrayFunc, array_like)
-            value = np.asanyarray(value, dtype=self.metadata.dtype, like=array_like_)
+            value = np.asanyarray(value, dtype=self.dtype, like=array_like_)
         else:
             if not hasattr(value, "shape"):
-                value = np.asarray(value, self.metadata.dtype)
+                value = np.asarray(value, self.dtype)
             # assert (
             #     value.shape == indexer.shape
             # ), f"shape of value doesn't match indexer shape. Expected {indexer.shape}, got {value.shape}"
-            if not hasattr(value, "dtype") or value.dtype.name != self.metadata.dtype.name:
+            if not hasattr(value, "dtype") or value.dtype.name != self.dtype.name:
                 if hasattr(value, "astype"):
                     # Handle things that are already NDArrayLike more efficiently
-                    value = value.astype(dtype=self.metadata.dtype, order="A")
+                    value = value.astype(dtype=self.dtype, order="A")
                 else:
-                    value = np.array(value, dtype=self.metadata.dtype, order="A")
+                    value = np.array(value, dtype=self.dtype, order="A")
         value = cast(NDArrayLike, value)
         # We accept any ndarray like object from the user and convert it
         # to a NDBuffer (or subclass). From this point onwards, we only pass
@@ -1426,7 +1453,7 @@ async def _set_selection(
             [
                 (
                     self.store_path / self.metadata.encode_chunk_key(chunk_coords),
-                    self.metadata.get_chunk_spec(chunk_coords, _config, prototype),
+                    self.get_chunk_spec(chunk_coords, _config, prototype),
                     chunk_selection,
                     out_selection,
                     is_complete_chunk,
@@ -1481,7 +1508,7 @@ async def setitem(
         indexer = BasicIndexer(
             selection,
             shape=self.metadata.shape,
-            chunk_grid=self.metadata.chunk_grid,
+            chunk_grid=self.chunk_grid,
         )
         return await self._set_selection(indexer, value, prototype=prototype)
 
@@ -1518,8 +1545,8 @@ async def resize(self, new_shape: ShapeLike, delete_outside_chunks: bool = True)
 
         if delete_outside_chunks:
             # Remove all chunks outside of the new shape
-            old_chunk_coords = set(self.metadata.chunk_grid.all_chunk_coords(self.metadata.shape))
-            new_chunk_coords = set(self.metadata.chunk_grid.all_chunk_coords(new_shape))
+            old_chunk_coords = set(self.chunk_grid.all_chunk_coords(self.metadata.shape))
+            new_chunk_coords = set(self.chunk_grid.all_chunk_coords(new_shape))
 
             async def _delete_key(key: str) -> None:
                 await (self.store_path / key).delete()
@@ -1692,15 +1719,9 @@ async def info_complete(self) -> Any:
     def _info(
         self, count_chunks_initialized: int | None = None, count_bytes_stored: int | None = None
     ) -> Any:
-        _data_type: np.dtype[Any] | DTypeWrapper
-        if isinstance(self.metadata, ArrayV2Metadata):
-            _data_type = self.metadata.dtype
-        else:
-            _data_type = self.metadata.data_type
-
         return ArrayInfo(
             _zarr_format=self.metadata.zarr_format,
-            _data_type=_data_type,
+            _data_type=self.dtype,
             _shape=self.shape,
             _order=self.order,
             _shard_shape=self.shards,
@@ -2044,7 +2065,7 @@ def dtype(self) -> np.dtype[Any]:
         np.dtype
             The NumPy data type.
         """
-        return self._async_array.dtype.unwrap()
+        return self._async_array.dtype
 
     @property
     def attrs(self) -> Attributes:
@@ -2654,7 +2675,7 @@ def get_basic_selection(
             prototype = default_buffer_prototype()
         return sync(
             self._async_array._get_selection(
-                BasicIndexer(selection, self.shape, self.metadata.chunk_grid),
+                BasicIndexer(selection, self.shape, self._async_array.chunk_grid),
                 out=out,
                 fields=fields,
                 prototype=prototype,
@@ -2754,7 +2775,7 @@ def set_basic_selection(
         """
         if prototype is None:
             prototype = default_buffer_prototype()
-        indexer = BasicIndexer(selection, self.shape, self.metadata.chunk_grid)
+        indexer = BasicIndexer(selection, self.shape, self._async_array.chunk_grid)
         sync(self._async_array._set_selection(indexer, value, fields=fields, prototype=prototype))
 
     @_deprecate_positional_args
@@ -2875,7 +2896,7 @@ def get_orthogonal_selection(
         """
         if prototype is None:
             prototype = default_buffer_prototype()
-        indexer = OrthogonalIndexer(selection, self.shape, self.metadata.chunk_grid)
+        indexer = OrthogonalIndexer(selection, self.shape, self._async_array.chunk_grid)
         return sync(
             self._async_array._get_selection(
                 indexer=indexer, out=out, fields=fields, prototype=prototype
@@ -2988,7 +3009,7 @@ def set_orthogonal_selection(
         """
         if prototype is None:
             prototype = default_buffer_prototype()
-        indexer = OrthogonalIndexer(selection, self.shape, self.metadata.chunk_grid)
+        indexer = OrthogonalIndexer(selection, self.shape, self._async_array.chunk_grid)
         return sync(
             self._async_array._set_selection(indexer, value, fields=fields, prototype=prototype)
         )
@@ -3069,7 +3090,7 @@ def get_mask_selection(
 
         if prototype is None:
             prototype = default_buffer_prototype()
-        indexer = MaskIndexer(mask, self.shape, self.metadata.chunk_grid)
+        indexer = MaskIndexer(mask, self.shape, self._async_array.chunk_grid)
         return sync(
             self._async_array._get_selection(
                 indexer=indexer, out=out, fields=fields, prototype=prototype
@@ -3152,7 +3173,7 @@ def set_mask_selection(
         """
         if prototype is None:
             prototype = default_buffer_prototype()
-        indexer = MaskIndexer(mask, self.shape, self.metadata.chunk_grid)
+        indexer = MaskIndexer(mask, self.shape, self._async_array.chunk_grid)
         sync(self._async_array._set_selection(indexer, value, fields=fields, prototype=prototype))
 
     @_deprecate_positional_args
@@ -3233,7 +3254,7 @@ def get_coordinate_selection(
         """
         if prototype is None:
             prototype = default_buffer_prototype()
-        indexer = CoordinateIndexer(selection, self.shape, self.metadata.chunk_grid)
+        indexer = CoordinateIndexer(selection, self.shape, self._async_array.chunk_grid)
         out_array = sync(
             self._async_array._get_selection(
                 indexer=indexer, out=out, fields=fields, prototype=prototype
@@ -3319,7 +3340,7 @@ def set_coordinate_selection(
         if prototype is None:
             prototype = default_buffer_prototype()
         # setup indexer
-        indexer = CoordinateIndexer(selection, self.shape, self.metadata.chunk_grid)
+        indexer = CoordinateIndexer(selection, self.shape, self._async_array.chunk_grid)
 
         # handle value - need ndarray-like flatten value
         if not is_scalar(value, self.dtype):
@@ -3435,7 +3456,7 @@ def get_block_selection(
         """
         if prototype is None:
             prototype = default_buffer_prototype()
-        indexer = BlockIndexer(selection, self.shape, self.metadata.chunk_grid)
+        indexer = BlockIndexer(selection, self.shape, self._async_array.chunk_grid)
         return sync(
             self._async_array._get_selection(
                 indexer=indexer, out=out, fields=fields, prototype=prototype
@@ -3529,7 +3550,7 @@ def set_block_selection(
         """
         if prototype is None:
             prototype = default_buffer_prototype()
-        indexer = BlockIndexer(selection, self.shape, self.metadata.chunk_grid)
+        indexer = BlockIndexer(selection, self.shape, self._async_array.chunk_grid)
         sync(self._async_array._set_selection(indexer, value, fields=fields, prototype=prototype))
 
     @property
@@ -3771,13 +3792,6 @@ def _build_parents(
     return parents
 
 
-def _get_default_codecs(
-    np_dtype: np.dtype[Any],
-) -> tuple[Codec, ...]:
-    filters, serializer, compressors = _get_default_chunk_encoding_v3(np_dtype)
-    return filters + (serializer,) + compressors
-
-
 FiltersLike: TypeAlias = (
     Iterable[dict[str, JSON] | ArrayArrayCodec | numcodecs.abc.Codec]
     | ArrayArrayCodec
@@ -3917,7 +3931,10 @@ async def init_array(
 
     from zarr.codecs.sharding import ShardingCodec, ShardingCodecIndexLocation
 
-    dtype_wrapped = get_data_type_from_numpy(dtype)
+    if not isinstance(dtype, DTypeWrapper):
+        dtype_wrapped = get_data_type_from_numpy(dtype)
+    else:
+        dtype_wrapped = dtype
     shape_parsed = parse_shapelike(shape)
     chunk_key_encoding_parsed = _parse_chunk_key_encoding(
         chunk_key_encoding, zarr_format=zarr_format
@@ -4178,7 +4195,7 @@ async def create_array(
 
     if write_data is True and data_parsed is not None:
         await result._set_selection(
-            BasicIndexer(..., shape=result.shape, chunk_grid=result.metadata.chunk_grid),
+            BasicIndexer(..., shape=result.shape, chunk_grid=result.chunk_grid),
             data_parsed,
             prototype=default_buffer_prototype(),
         )
@@ -4215,15 +4232,20 @@ def _get_default_chunk_encoding_v3(
     """
     Get the default ArrayArrayCodecs, ArrayBytesCodec, and BytesBytesCodec for a given dtype.
     """
-
-    default_filters = zarr_config.get("array.v3_default_filters").get(dtype.kind)
-    default_serializer = zarr_config.get("array.v3_default_serializer").get(dtype.kind)
-    default_compressors = zarr_config.get("array.v3_default_compressors").get(dtype.kind)
-
-    filters = tuple(_parse_array_array_codec(codec_dict) for codec_dict in default_filters)
-    serializer = _parse_array_bytes_codec(default_serializer)
-    compressors = tuple(_parse_bytes_bytes_codec(codec_dict) for codec_dict in default_compressors)
-
+    filters = ()
+    compressors = (ZstdCodec(level=0, checksum=False),)
+    # TODO: find a registry-style solution for this that isn't bloated
+    # We need to associate specific dtypes with specific encoding schemes
+
+    if isinstance(dtype, VariableLengthString):
+        serializer = VLenUTF8Codec()
+    elif isinstance(dtype, StaticByteString):
+        serializer = VLenBytesCodec()
+    else:
+        if dtype.unwrap().itemsize == 1:
+            serializer = BytesCodec(endian=None)
+        else:
+            serializer = BytesCodec()
     return filters, serializer, compressors
 
 
@@ -4233,17 +4255,18 @@ def _get_default_chunk_encoding_v2(
     """
     Get the default chunk encoding for Zarr format 2 arrays, given a dtype
     """
+    from numcodecs import VLenBytes as numcodecs_VLenBytes
+    from numcodecs import VLenUTF8 as numcodecs_VLenUTF8
+    from numcodecs import Zstd as numcodecs_zstd
+
+    if isinstance(dtype, VariableLengthString):
+        filters = (numcodecs_VLenUTF8(),)
+    elif isinstance(dtype, StaticByteString):
+        filters = (numcodecs_VLenBytes(),)
+    else:
+        filters = None
 
-    compressor_dict = _default_compressor(dtype)
-    filter_dicts = _default_filters(dtype)
-
-    compressor = None
-    if compressor_dict is not None:
-        compressor = numcodecs.get_codec(compressor_dict)
-
-    filters = None
-    if filter_dicts is not None:
-        filters = tuple(numcodecs.get_codec(f) for f in filter_dicts)
+    compressor = numcodecs_zstd(level=0, checksum=False)
 
     return filters, compressor
 
@@ -4296,7 +4319,7 @@ def _parse_chunk_encoding_v3(
     compressors: CompressorsLike,
     filters: FiltersLike,
     serializer: SerializerLike,
-    dtype: np.dtype[Any],
+    dtype: DTypeWrapper[Any, Any],
 ) -> tuple[tuple[ArrayArrayCodec, ...], ArrayBytesCodec, tuple[BytesBytesCodec, ...]]:
     """
     Generate chunk encoding classes for v3 arrays with optional defaults.
diff --git a/src/zarr/core/array_spec.py b/src/zarr/core/array_spec.py
index cf92f11050..f5a060cf95 100644
--- a/src/zarr/core/array_spec.py
+++ b/src/zarr/core/array_spec.py
@@ -11,8 +11,7 @@
     parse_shapelike,
 )
 from zarr.core.config import config as zarr_config
-from zarr.core.metadata.dtype import DTypeWrapper
-from zarr.registry import get_data_type_from_numpy
+from zarr.core.metadata.dtype import DTypeWrapper, get_data_type_from_numpy
 
 if TYPE_CHECKING:
     from typing import NotRequired
@@ -100,7 +99,7 @@ class ArraySpec:
     def __init__(
         self,
         shape: ChunkCoords,
-        dtype: npt.DtypeLike | DTypeWrapper[Any, Any],
+        dtype: npt.DTypeLike | DTypeWrapper[Any, Any],
         fill_value: Any,
         config: ArrayConfig,
         prototype: BufferPrototype,
diff --git a/src/zarr/core/codec_pipeline.py b/src/zarr/core/codec_pipeline.py
index 628a7e0487..315dbb77a9 100644
--- a/src/zarr/core/codec_pipeline.py
+++ b/src/zarr/core/codec_pipeline.py
@@ -17,7 +17,6 @@
 from zarr.core.common import ChunkCoords, concurrent_map
 from zarr.core.config import config
 from zarr.core.indexing import SelectorTuple, is_scalar
-from zarr.core.metadata.v2 import _default_fill_value
 from zarr.registry import register_pipeline
 
 if TYPE_CHECKING:
@@ -64,7 +63,7 @@ def fill_value_or_default(chunk_spec: ArraySpec) -> Any:
         # validated when decoding the metadata, but we support reading
         # Zarr V2 data and need to support the case where fill_value
         # is None.
-        return _default_fill_value(dtype=chunk_spec.dtype)
+        return chunk_spec.dtype.default_value
     else:
         return fill_value
 
@@ -317,7 +316,7 @@ def _merge_chunk_array(
         if existing_chunk_array is None:
             chunk_array = chunk_spec.prototype.nd_buffer.create(
                 shape=chunk_spec.shape,
-                dtype=chunk_spec.dtype,
+                dtype=chunk_spec.dtype.unwrap(),
                 order=chunk_spec.order,
                 fill_value=fill_value_or_default(chunk_spec),
             )
diff --git a/src/zarr/core/metadata/dtype.py b/src/zarr/core/metadata/dtype.py
index f88683e1e7..a573794730 100644
--- a/src/zarr/core/metadata/dtype.py
+++ b/src/zarr/core/metadata/dtype.py
@@ -1,16 +1,32 @@
+from __future__ import annotations
+
+import base64
 from abc import ABC, abstractmethod
 from collections.abc import Sequence
-from dataclasses import dataclass, replace
-from typing import Any, ClassVar, Generic, Literal, Self, TypeGuard, TypeVar, cast, get_args
+from dataclasses import dataclass, field, replace
+from importlib.metadata import EntryPoint
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    ClassVar,
+    Generic,
+    Literal,
+    Self,
+    TypeGuard,
+    TypeVar,
+    cast,
+    get_args,
+)
 
 import numpy as np
 import numpy.typing as npt
 from typing_extensions import get_original_bases
 
 from zarr.abc.metadata import Metadata
-from zarr.core.common import JSON, ZarrFormat
 from zarr.core.strings import _NUMPY_SUPPORTS_VLEN_STRING
-from zarr.registry import register_data_type
+
+if TYPE_CHECKING:
+    from zarr.core.common import JSON, ZarrFormat
 
 Endianness = Literal["little", "big", "native"]
 DataTypeFlavor = Literal["boolean", "numeric", "string", "bytes"]
@@ -132,16 +148,16 @@ def float_to_json(data: float | np.floating[Any], zarr_format: ZarrFormat) -> JS
     raise ValueError(f"Invalid zarr format: {zarr_format}. Expected 2 or 3.")
 
 
-def complex_to_json_v2(data: complex | np.complexfloating) -> JSONFloat:
-    return float_to_json_v2(data)
+def complex_to_json_v2(data: complex | np.complexfloating[Any]) -> tuple[JSONFloat, JSONFloat]:
+    return float_to_json_v2(data.real), float_to_json_v2(data.imag)
 
 
-def complex_to_json_v3(data: complex | np.complexfloating) -> tuple[JSONFloat, JSONFloat]:
+def complex_to_json_v3(data: complex | np.complexfloating[Any]) -> tuple[JSONFloat, JSONFloat]:
     return float_to_json_v3(data.real), float_to_json_v3(data.imag)
 
 
 def complex_to_json(
-    data: complex | np.complexfloating, zarr_format: ZarrFormat
+    data: complex | np.complexfloating[Any], zarr_format: ZarrFormat
 ) -> tuple[JSONFloat, JSONFloat] | JSONFloat:
     if zarr_format == 2:
         return complex_to_json_v2(data)
@@ -150,6 +166,18 @@ def complex_to_json(
     raise ValueError(f"Invalid zarr format: {zarr_format}. Expected 2 or 3.")
 
 
+def structured_scalar_to_json(data: bytes, zarr_format: ZarrFormat) -> str:
+    if zarr_format == 2:
+        return base64.b64encode(data).decode("ascii")
+    raise NotImplementedError(f"Invalid zarr format: {zarr_format}. Expected 2.")
+
+
+def structured_scalar_from_json(data: JSON, zarr_format: ZarrFormat) -> bytes:
+    if zarr_format == 2:
+        return base64.b64decode(data.encode("ascii"))
+    raise NotImplementedError(f"Invalid zarr format: {zarr_format}. Expected 2.")
+
+
 def float_from_json_v2(data: JSONFloat) -> float:
     match data:
         case "NaN":
@@ -196,7 +224,7 @@ def complex_from_json(
 
 
 TDType = TypeVar("TDType", bound=np.dtype[Any])
-TScalar = TypeVar("TScalar", bound=np.generic)
+TScalar = TypeVar("TScalar", bound=np.generic | str)
 
 
 @dataclass(frozen=True, kw_only=True)
@@ -205,7 +233,7 @@ class DTypeWrapper(Generic[TDType, TScalar], ABC, Metadata):
     dtype_cls: ClassVar[type[TDType]]  # this class will create a numpy dtype
     kind: ClassVar[DataTypeFlavor]
     default_value: ClassVar[TScalar]
-    endianness: Endianness = "native"
+    endianness: Endianness | None = "native"
 
     def __init_subclass__(cls) -> None:
         # Subclasses will bind the first generic type parameter to an attribute of the class
@@ -221,8 +249,21 @@ def cast_value(self: Self, value: object) -> TScalar:
         return cast(np.generic, self.unwrap().type(value))
 
     @classmethod
-    @abstractmethod
+    def check_dtype(cls: type[Self], dtype: TDType) -> TypeGuard[TDType]:
+        """
+        Check that a dtype matches the dtype_cls class attribute
+        """
+        return type(dtype) is cls.dtype_cls
+
+    @classmethod
     def wrap(cls: type[Self], dtype: TDType) -> Self:
+        if cls.check_dtype(dtype):
+            return cls._wrap_unsafe(dtype)
+        raise TypeError(f"Invalid dtype: {dtype}. Expected an instance of {cls.dtype_cls}.")
+
+    @classmethod
+    @abstractmethod
+    def _wrap_unsafe(cls: type[Self], dtype: TDType) -> Self:
         raise NotImplementedError
 
     def unwrap(self: Self) -> TDType:
@@ -254,7 +295,7 @@ class Bool(DTypeWrapper[np.dtypes.BoolDType, np.bool_]):
     default_value = np.False_
 
     @classmethod
-    def wrap(cls, dtype: np.dtypes.BoolDType) -> Self:
+    def _wrap_unsafe(cls, dtype: np.dtypes.BoolDType) -> Self:
         return cls()
 
     def to_json_value(self, data: np.generic, zarr_format: ZarrFormat) -> bool:
@@ -270,7 +311,7 @@ class IntWrapperBase(DTypeWrapper[TDType, TScalar]):
     kind = "numeric"
 
     @classmethod
-    def wrap(cls, dtype: TDType) -> Self:
+    def _wrap_unsafe(cls, dtype: TDType) -> Self:
         return cls()
 
     def to_json_value(self, data: np.generic, zarr_format: ZarrFormat) -> int:
@@ -334,7 +375,7 @@ class FloatWrapperBase(DTypeWrapper[TDType, TScalar]):
     kind = "numeric"
 
     @classmethod
-    def wrap(cls, dtype: TDType) -> Self:
+    def _wrap_unsafe(cls, dtype: TDType) -> Self:
         return cls()
 
     def to_json_value(self, data: np.generic, zarr_format: ZarrFormat) -> JSONFloat:
@@ -371,7 +412,7 @@ class Complex64(DTypeWrapper[np.dtypes.Complex64DType, np.complex64]):
     default_value = np.complex64(0)
 
     @classmethod
-    def wrap(cls, dtype: np.dtypes.Complex64DType) -> Self:
+    def _wrap_unsafe(cls, dtype: np.dtypes.Complex64DType) -> Self:
         return cls()
 
     def to_json_value(
@@ -392,7 +433,7 @@ class Complex128(DTypeWrapper[np.dtypes.Complex128DType, np.complex128]):
     default_value = np.complex128(0)
 
     @classmethod
-    def wrap(cls, dtype: np.dtypes.Complex128DType) -> Self:
+    def _wrap_unsafe(cls, dtype: np.dtypes.Complex128DType) -> Self:
         return cls()
 
     def to_json_value(
@@ -412,7 +453,7 @@ class FlexibleWrapperBase(DTypeWrapper[TDType, TScalar]):
     length: int
 
     @classmethod
-    def wrap(cls, dtype: TDType) -> Self:
+    def _wrap_unsafe(cls, dtype: TDType) -> Self:
         return cls(length=dtype.itemsize // (cls.item_size_bits // 8))
 
     def unwrap(self) -> TDType:
@@ -431,10 +472,10 @@ def to_dict(self) -> dict[str, JSON]:
         return {"name": self.name, "configuration": {"length": self.length}}
 
     def to_json_value(self, data: np.generic, *, zarr_format: ZarrFormat) -> str:
-        return data.tobytes().decode("ascii")
+        return base64.standard_b64encode(data).decode("ascii")
 
     def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.bytes_:
-        if check_json_bool(data):
+        if check_json_str(data):
             return self.unwrap().type(data.encode("ascii"))
         raise TypeError(f"Invalid type: {data}. Expected a string.")
 
@@ -456,7 +497,7 @@ def unwrap(self) -> np.dtypes.VoidDType:
         return np.dtype(f"{endianness_code}V{self.length}")
 
     def to_json_value(self, data: np.generic, *, zarr_format: ZarrFormat) -> tuple[int, ...]:
-        return tuple(*data.tobytes())
+        return base64.standard_b64encode(data).decode("ascii")
 
     def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.void:
         # todo: check that this is well-formed
@@ -491,20 +532,22 @@ class VariableLengthString(DTypeWrapper[np.dtypes.StringDType, str]):
         default_value = ""
 
         @classmethod
-        def wrap(cls, dtype: np.dtypes.StringDType) -> Self:
+        def _wrap_unsafe(cls, dtype: np.dtypes.StringDType) -> Self:
             return cls()
 
         def to_dict(self) -> dict[str, JSON]:
             return {"name": self.name}
 
         def unwrap(self) -> np.dtypes.StringDType:
-            endianness_code = endianness_to_numpy_str(self.endianness)
-            return np.dtype(endianness_code + self.numpy_character_code)
+            # StringDType does not have endianness, so we ignore it here
+            return self.dtype_cls()
 
         def to_json_value(self, data: np.generic, *, zarr_format: ZarrFormat) -> str:
             return str(data)
 
         def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> str:
+            if not check_json_str(data):
+                raise TypeError(f"Invalid type: {data}. Expected a string.")
             return self.unwrap().type(data)
 
 else:
@@ -514,27 +557,96 @@ class VariableLengthString(DTypeWrapper[np.dtypes.ObjectDType, str]):
         name = "numpy/vlen_string"
         kind = "string"
         default_value = np.object_("")
+        endianness: Endianness = field(default=None)
+
+        def __post_init__(self) -> None:
+            if self.endianness is not None:
+                raise ValueError("VariableLengthString does not support endianness.")
 
         def to_dict(self) -> dict[str, JSON]:
             return {"name": self.name}
 
         @classmethod
-        def wrap(cls, dtype: np.dtypes.ObjectDType) -> Self:
+        def _wrap_unsafe(cls, dtype: np.dtypes.ObjectDType) -> Self:
             return cls()
 
-        def unwrap(self) -> np.dtype[np.dtypes.ObjectDType]:
+        def unwrap(self) -> np.dtypes.ObjectDType:
             return super().unwrap()
 
         def to_json_value(self, data: np.generic, *, zarr_format: ZarrFormat) -> str:
             return str(data)
 
         def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> str:
+            if not check_json_str(data):
+                raise TypeError(f"Invalid type: {data}. Expected a string.")
             return self.unwrap().type(data)
 
 
-def resolve_dtype(dtype: npt.DTypeLike | DTypeWrapper | dict[str, JSON]) -> DTypeWrapper:
-    from zarr.registry import get_data_type_from_dict, get_data_type_from_numpy
+@dataclass(frozen=True, kw_only=True)
+class StructuredDtype(DTypeWrapper[np.dtypes.VoidDType, np.void]):
+    name = "numpy/struct"
+    kind = "struct"
+    fields: tuple[tuple[str, DTypeWrapper[Any, Any], int], ...]
+
+    @classmethod
+    def check_dtype(cls, dtype: np.dtypes.DTypeLike) -> TypeGuard[np.dtypes.VoidDType]:
+        """
+        Check that this dtype is a numpy structured dtype
+        """
+        return super().check_dtype(dtype) and dtype.fields is not None
+
+    @classmethod
+    def _wrap_unsafe(cls, dtype: np.dtypes.VoidDType) -> Self:
+        fields: list[tuple[str, DTypeWrapper[Any, Any], int]] = []
+
+        if dtype.fields is None:
+            raise ValueError("numpy dtype has no fields")
+
+        for key, (dtype_instance, offset) in dtype.fields.items():
+            dtype_wrapped = data_type_registry.match_dtype(dtype_instance)
+            fields.append((key, dtype_wrapped, offset))
+
+        return cls(fields=tuple(fields))
 
+    def to_json_value(self, data: np.generic, *, zarr_format: ZarrFormat) -> str:
+        return structured_scalar_to_json(data.tobytes(), zarr_format)
+
+    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.void:
+        if not check_json_str(data):
+            raise TypeError(f"Invalid type: {data}. Expected a string.")
+        as_bytes = structured_scalar_from_json(data, zarr_format=zarr_format)
+        dtype = self.unwrap()
+        return np.array([as_bytes], dtype=dtype.str).view(dtype)[0]
+
+
+def get_data_type_from_numpy(dtype: npt.DTypeLike) -> DTypeWrapper:
+    if dtype in (str, "str"):
+        if _NUMPY_SUPPORTS_VLEN_STRING:
+            np_dtype = np.dtype("T")
+        else:
+            np_dtype = np.dtype("O")
+    else:
+        np_dtype = np.dtype(dtype)
+    data_type_registry.lazy_load()
+    for val in data_type_registry.contents.values():
+        return val.wrap(np_dtype)
+    raise ValueError(
+        f"numpy dtype '{dtype}' does not have a corresponding Zarr dtype in: {list(data_type_registry.contents)}."
+    )
+
+
+def get_data_type_from_dict(dtype: dict[str, JSON]) -> DTypeWrapper:
+    data_type_registry.lazy_load()
+    dtype_name = dtype["name"]
+    dtype_cls = data_type_registry.get(dtype_name)
+    if dtype_cls is None:
+        raise ValueError(f"No data type class matching name {dtype_name}")
+    return dtype_cls.from_dict(dtype.get("configuration", {}))
+
+
+def resolve_dtype(
+    dtype: npt.DTypeLike | DTypeWrapper[Any, Any] | dict[str, JSON],
+) -> DTypeWrapper[Any, Any]:
     if isinstance(dtype, DTypeWrapper):
         return dtype
     elif isinstance(dtype, dict):
@@ -543,6 +655,55 @@ def resolve_dtype(dtype: npt.DTypeLike | DTypeWrapper | dict[str, JSON]) -> DTyp
         return get_data_type_from_numpy(dtype)
 
 
+def get_data_type_by_name(
+    dtype: str, configuration: dict[str, JSON] | None = None
+) -> DTypeWrapper[Any, Any]:
+    data_type_registry.lazy_load()
+    if configuration is None:
+        _configuration = {}
+    else:
+        _configuration = configuration
+    maybe_dtype_cls = data_type_registry.get(dtype)
+    if maybe_dtype_cls is None:
+        raise ValueError(f"No data type class matching name {dtype}")
+    return maybe_dtype_cls.from_dict(_configuration)
+
+
+@dataclass(frozen=True, kw_only=True)
+class DataTypeRegistry:
+    contents: dict[str, type[DTypeWrapper[Any, Any]]] = field(default_factory=dict, init=False)
+    lazy_load_list: list[EntryPoint] = field(default_factory=list, init=False)
+
+    def lazy_load(self) -> None:
+        for e in self.lazy_load_list:
+            self.register(e.load())
+
+        self.lazy_load_list.clear()
+
+    def register(self: Self, cls: type[DTypeWrapper[Any, Any]]) -> None:
+        # don't register the same dtype twice
+        if cls.name not in self.contents or self.contents[cls.name] != cls:
+            self.contents[cls.name] = cls
+
+    def get(self, key: str) -> type[DTypeWrapper[Any, Any]]:
+        return self.contents[key]
+
+    def match_dtype(self, dtype: npt.DTypeLike) -> DTypeWrapper[Any, Any]:
+        data_type_registry.lazy_load()
+        for val in data_type_registry.contents.values():
+            try:
+                return val._wrap_unsafe(dtype)
+            except ValueError:
+                pass
+        raise ValueError(f"No data type wrapper found that matches {dtype}")
+
+
+def register_data_type(cls: type[DTypeWrapper[Any, Any]]) -> None:
+    data_type_registry.register(cls)
+
+
+data_type_registry = DataTypeRegistry()
+
 INTEGER_DTYPE = Int8 | Int16 | Int32 | Int64 | UInt8 | UInt16 | UInt32 | UInt64
 FLOAT_DTYPE = Float16 | Float32 | Float64
 COMPLEX_DTYPE = Complex64 | Complex128
diff --git a/src/zarr/core/metadata/v2.py b/src/zarr/core/metadata/v2.py
index 2ba2ac5c45..ebf174eff3 100644
--- a/src/zarr/core/metadata/v2.py
+++ b/src/zarr/core/metadata/v2.py
@@ -4,14 +4,17 @@
 import warnings
 from collections.abc import Iterable
 from enum import Enum
-from functools import cached_property
 from typing import TYPE_CHECKING, TypedDict, cast
 
 import numcodecs.abc
 
 from zarr.abc.metadata import Metadata
-from zarr.core.metadata.dtype import DTypeWrapper
-from zarr.registry import get_data_type_from_numpy
+from zarr.core.metadata.dtype import (
+    DTypeWrapper,
+    StaticByteString,
+    StaticRawBytes,
+    get_data_type_from_numpy,
+)
 
 if TYPE_CHECKING:
     from typing import Any, Literal, Self
@@ -28,7 +31,6 @@
 import numpy as np
 
 from zarr.core.array_spec import ArrayConfig, ArraySpec
-from zarr.core.chunk_grids import RegularChunkGrid
 from zarr.core.chunk_key_encodings import parse_separator
 from zarr.core.common import JSON, ZARRAY_JSON, ZATTRS_JSON, MemoryOrder, parse_shapelike
 from zarr.core.config import config, parse_indexing_order
@@ -102,10 +104,6 @@ def __init__(
     def ndim(self) -> int:
         return len(self.shape)
 
-    @cached_property
-    def chunk_grid(self) -> RegularChunkGrid:
-        return RegularChunkGrid(chunk_shape=self.chunks)
-
     @property
     def shards(self) -> ChunkCoords | None:
         return None
@@ -199,11 +197,14 @@ def from_dict(cls, data: dict[str, Any]) -> ArrayV2Metadata:
     def to_dict(self) -> dict[str, JSON]:
         zarray_dict = super().to_dict()
 
-        if self.dtype.kind in "SV" and self.fill_value is not None:
+        if (
+            isinstance(self.dtype, StaticByteString | StaticRawBytes)
+            and self.fill_value is not None
+        ):
             # There's a relationship between self.dtype and self.fill_value
             # that mypy isn't aware of. The fact that we have S or V dtype here
             # means we should have a bytes-type fill_value.
-            fill_value = base64.standard_b64encode(cast(bytes, self.fill_value)).decode("ascii")
+            fill_value = self.dtype.to_json_value(self.fill_value, zarr_format=2)
             zarray_dict["fill_value"] = fill_value
 
         _ = zarray_dict.pop("dtype")
@@ -351,35 +352,6 @@ def parse_fill_value(fill_value: object, dtype: np.dtype[Any]) -> Any:
     return fill_value
 
 
-def _default_fill_value(dtype: np.dtype[Any]) -> Any:
-    """
-    Get the default fill value for a type.
-
-    Notes
-    -----
-    This differs from :func:`parse_fill_value`, which parses a fill value
-    stored in the Array metadata into an in-memory value. This only gives
-    the default fill value for some type.
-
-    This is useful for reading Zarr format 2 arrays, which allow the fill
-    value to be unspecified.
-    """
-    if dtype.kind == "S":
-        return b""
-    elif dtype.kind in "UO":
-        return ""
-    elif dtype.kind in "Mm":
-        return dtype.type("nat")
-    elif dtype.kind == "V":
-        if dtype.fields is not None:
-            default = tuple(_default_fill_value(field[0]) for field in dtype.fields.values())
-            return np.array([default], dtype=dtype)
-        else:
-            return np.zeros(1, dtype=dtype)
-    else:
-        return dtype.type(0)
-
-
 def _default_compressor(
     dtype: DTypeWrapper[Any, Any],
 ) -> dict[str, JSON] | None:
diff --git a/src/zarr/core/metadata/v3.py b/src/zarr/core/metadata/v3.py
index 8bf20899c3..e285490bfd 100644
--- a/src/zarr/core/metadata/v3.py
+++ b/src/zarr/core/metadata/v3.py
@@ -4,7 +4,13 @@
 
 from zarr.abc.metadata import Metadata
 from zarr.core.buffer.core import default_buffer_prototype
-from zarr.core.metadata.dtype import DTypeWrapper
+from zarr.core.metadata.dtype import (
+    DTypeWrapper,
+    VariableLengthString,
+    get_data_type_by_name,
+    get_data_type_from_dict,
+)
+
 if TYPE_CHECKING:
     from collections.abc import Callable
     from typing import Self
@@ -12,7 +18,7 @@
     from zarr.core.buffer import Buffer, BufferPrototype
     from zarr.core.chunk_grids import ChunkGrid
     from zarr.core.common import JSON, ChunkCoords
-    
+
 
 import json
 from collections.abc import Iterable
@@ -37,7 +43,7 @@
 from zarr.core.config import config
 from zarr.core.metadata.common import parse_attributes
 from zarr.errors import MetadataValidationError, NodeTypeValidationError
-from zarr.registry import get_codec_class, get_data_type_by_name, get_data_type_from_dict
+from zarr.registry import get_codec_class
 
 DEFAULT_DTYPE = "float64"
 
@@ -103,14 +109,10 @@ def validate_codecs(codecs: tuple[Codec, ...], dtype: DTypeWrapper) -> None:
     # we need to have special codecs if we are decoding vlen strings or bytestrings
     # TODO: use codec ID instead of class name
     codec_class_name = abc.__class__.__name__
-    if dtype.kind == "string" and not codec_class_name == "VLenUTF8Codec":
+    if isinstance(dtype, VariableLengthString) and not codec_class_name == "VLenUTF8Codec":
         raise ValueError(
             f"For string dtype, ArrayBytesCodec must be `VLenUTF8Codec`, got `{codec_class_name}`."
         )
-    if dtype.kind == "bytes" and not codec_class_name == "VLenBytesCodec":
-        raise ValueError(
-            f"For bytes dtype, ArrayBytesCodec must be `VLenBytesCodec`, got `{codec_class_name}`."
-        )
 
 
 def parse_dimension_names(data: object) -> tuple[str | None, ...] | None:
@@ -313,11 +315,6 @@ def _validate_metadata(self) -> None:
                 shape=self.shape, dtype=self.data_type.unwrap(), chunk_grid=self.chunk_grid
             )
 
-    @property
-    def dtype(self) -> np.dtype[Any]:
-        """Interpret Zarr dtype as NumPy dtype"""
-        return self.data_type.unwrap()
-
     @property
     def ndim(self) -> int:
         return len(self.shape)
@@ -365,20 +362,6 @@ def inner_codecs(self) -> tuple[Codec, ...]:
                 return self.codecs[0].codecs
         return self.codecs
 
-    def get_chunk_spec(
-        self, _chunk_coords: ChunkCoords, array_config: ArrayConfig, prototype: BufferPrototype
-    ) -> ArraySpec:
-        assert isinstance(self.chunk_grid, RegularChunkGrid), (
-            "Currently, only regular chunk grid is supported"
-        )
-        return ArraySpec(
-            shape=self.chunk_grid.chunk_shape,
-            dtype=self.dtype,
-            fill_value=self.fill_value,
-            config=array_config,
-            prototype=prototype,
-        )
-
     def encode_chunk_key(self, chunk_coords: ChunkCoords) -> str:
         return self.chunk_key_encoding.encode_chunk_key(chunk_coords)
 
diff --git a/src/zarr/registry.py b/src/zarr/registry.py
index 7760c599fd..8830cdb1a9 100644
--- a/src/zarr/registry.py
+++ b/src/zarr/registry.py
@@ -2,19 +2,15 @@
 
 import warnings
 from collections import defaultdict
-from dataclasses import dataclass, field
 from importlib.metadata import entry_points as get_entry_points
-from typing import TYPE_CHECKING, Any, Generic, Self, TypeVar
-
-import numpy as np
+from typing import TYPE_CHECKING, Any, Generic, TypeVar
 
 from zarr.core.config import BadConfigError, config
+from zarr.core.metadata.dtype import data_type_registry
 
 if TYPE_CHECKING:
     from importlib.metadata import EntryPoint
 
-    import numpy.typing as npt
-
     from zarr.abc.codec import (
         ArrayArrayCodec,
         ArrayBytesCodec,
@@ -24,7 +20,6 @@
     )
     from zarr.core.buffer import Buffer, NDBuffer
     from zarr.core.common import JSON
-    from zarr.core.metadata.dtype import DTypeWrapper
 
 __all__ = [
     "Registry",
@@ -56,31 +51,10 @@ def register(self, cls: type[T]) -> None:
         self[fully_qualified_name(cls)] = cls
 
 
-@dataclass(frozen=True, kw_only=True)
-class DataTypeRegistry:
-    contents: dict[str, type[DTypeWrapper]] = field(default_factory=dict, init=False)
-    lazy_load_list: list[EntryPoint] = field(default_factory=list, init=False)
-
-    def lazy_load(self) -> None:
-        for e in self.lazy_load_list:
-            self.register(e.load())
-
-        self.lazy_load_list.clear()
-
-    def register(self: Self, cls: type[DTypeWrapper]) -> None:
-        # don't register the same dtype twice
-        if cls.name not in self.contents or self.contents[cls.name] != cls:
-            self.contents[cls.name] = cls
-
-    def get(self, key: str) -> type[DTypeWrapper]:
-        return self.contents[key]
-
-
 __codec_registries: dict[str, Registry[Codec]] = defaultdict(Registry)
 __pipeline_registry: Registry[CodecPipeline] = Registry()
 __buffer_registry: Registry[Buffer] = Registry()
 __ndbuffer_registry: Registry[NDBuffer] = Registry()
-__data_type_registry = DataTypeRegistry()
 
 """
 The registry module is responsible for managing implementations of codecs,
@@ -117,8 +91,8 @@ def _collect_entrypoints() -> list[Registry[Any]]:
     __ndbuffer_registry.lazy_load_list.extend(entry_points.select(group="zarr.ndbuffer"))
     __ndbuffer_registry.lazy_load_list.extend(entry_points.select(group="zarr", name="ndbuffer"))
 
-    __data_type_registry.lazy_load_list.extend(entry_points.select(group="zarr.data_type"))
-    __data_type_registry.lazy_load_list.extend(entry_points.select(group="zarr", name="data_type"))
+    data_type_registry.lazy_load_list.extend(entry_points.select(group="zarr.data_type"))
+    data_type_registry.lazy_load_list.extend(entry_points.select(group="zarr", name="data_type"))
 
     __pipeline_registry.lazy_load_list.extend(entry_points.select(group="zarr.codec_pipeline"))
     __pipeline_registry.lazy_load_list.extend(
@@ -165,10 +139,6 @@ def register_buffer(cls: type[Buffer]) -> None:
     __buffer_registry.register(cls)
 
 
-def register_data_type(cls: type[DTypeWrapper]) -> None:
-    __data_type_registry.register(cls)
-
-
 def get_codec_class(key: str, reload_config: bool = False) -> type[Codec]:
     if reload_config:
         _reload_config()
@@ -305,36 +275,4 @@ def get_ndbuffer_class(reload_config: bool = False) -> type[NDBuffer]:
     )
 
 
-def get_data_type_by_name(dtype: str, configuration: dict[str, JSON] | None = None) -> DTypeWrapper:
-    __data_type_registry.lazy_load()
-    if configuration is None:
-        _configuration = {}
-    else:
-        _configuration = configuration
-    maybe_dtype_cls = __data_type_registry.get(dtype)
-    if maybe_dtype_cls is None:
-        raise ValueError(f"No data type class matching name {dtype}")
-    return maybe_dtype_cls.from_dict(_configuration)
-
-
-def get_data_type_from_dict(dtype: dict[str, JSON]) -> DTypeWrapper:
-    __data_type_registry.lazy_load()
-    dtype_name = dtype["name"]
-    dtype_cls = __data_type_registry.get(dtype_name)
-    if dtype_cls is None:
-        raise ValueError(f"No data type class matching name {dtype_name}")
-    return dtype_cls.from_dict(dtype.get("configuration", {}))
-
-
-def get_data_type_from_numpy(dtype: npt.DTypeLike) -> DTypeWrapper:
-    np_dtype = np.dtype(dtype)
-    __data_type_registry.lazy_load()
-    for val in __data_type_registry.contents.values():
-        if val.dtype_cls is type(np_dtype):
-            return val.wrap(np_dtype)
-    raise ValueError(
-        f"numpy dtype '{dtype}' does not have a corresponding Zarr dtype in: {list(__data_type_registry.contents)}."
-    )
-
-
 _collect_entrypoints()
diff --git a/tests/conftest.py b/tests/conftest.py
index a650accc51..6ff1c4596f 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -20,10 +20,10 @@
 from zarr.core.chunk_grids import RegularChunkGrid, _auto_partition
 from zarr.core.common import JSON, parse_shapelike
 from zarr.core.config import config as zarr_config
+from zarr.core.metadata.dtype import get_data_type_from_numpy
 from zarr.core.metadata.v2 import ArrayV2Metadata
 from zarr.core.metadata.v3 import ArrayV3Metadata
 from zarr.core.sync import sync
-from zarr.registry import get_data_type_from_numpy
 from zarr.storage import FsspecStore, LocalStore, MemoryStore, StorePath, ZipStore
 
 if TYPE_CHECKING:
@@ -243,7 +243,7 @@ def create_array_metadata(
     filters: FiltersLike = "auto",
     compressors: CompressorsLike = "auto",
     serializer: SerializerLike = "auto",
-    fill_value: Any | None = None,
+    fill_value: Any = 0,
     order: MemoryOrder | None = None,
     zarr_format: ZarrFormat,
     attributes: dict[str, JSON] | None = None,
@@ -263,7 +263,7 @@ def create_array_metadata(
         array_shape=shape_parsed,
         shard_shape=shards,
         chunk_shape=chunks,
-        dtype=dtype_parsed.unwrap().itemsize,
+        item_size=dtype_parsed.unwrap().itemsize,
     )
 
     if order is None:
@@ -274,11 +274,11 @@ def create_array_metadata(
 
     if zarr_format == 2:
         filters_parsed, compressor_parsed = _parse_chunk_encoding_v2(
-            compressor=compressors, filters=filters, dtype=np.dtype(dtype)
+            compressor=compressors, filters=filters, dtype=dtype_parsed
         )
         return ArrayV2Metadata(
             shape=shape_parsed,
-            dtype=np.dtype(dtype),
+            dtype=dtype_parsed,
             chunks=chunk_shape_parsed,
             order=order_parsed,
             dimension_separator=chunk_key_encoding_parsed.separator,
@@ -379,7 +379,7 @@ def meta_from_array(
     filters: FiltersLike = "auto",
     compressors: CompressorsLike = "auto",
     serializer: SerializerLike = "auto",
-    fill_value: Any | None = None,
+    fill_value: Any = 0,
     order: MemoryOrder | None = None,
     zarr_format: ZarrFormat = 3,
     attributes: dict[str, JSON] | None = None,
diff --git a/tests/test_array.py b/tests/test_array.py
index 959cf02055..d54001b54e 100644
--- a/tests/test_array.py
+++ b/tests/test_array.py
@@ -22,8 +22,6 @@
     BytesCodec,
     GzipCodec,
     TransposeCodec,
-    VLenBytesCodec,
-    VLenUTF8Codec,
     ZstdCodec,
 )
 from zarr.core._info import ArrayInfo
@@ -43,6 +41,7 @@
 from zarr.core.common import JSON, MemoryOrder, ZarrFormat
 from zarr.core.group import AsyncGroup
 from zarr.core.indexing import BasicIndexer, ceildiv
+from zarr.core.metadata.dtype import get_data_type_from_numpy
 from zarr.core.metadata.v3 import ArrayV3Metadata
 from zarr.core.sync import sync
 from zarr.errors import ContainsArrayError, ContainsGroupError
@@ -451,24 +450,7 @@ def test_vlen_errors() -> None:
         ValueError,
         match="For string dtype, ArrayBytesCodec must be `VLenUTF8Codec`, got `BytesCodec`.",
     ):
-        Array.create(MemoryStore(), shape=5, chunks=5, dtype="<U4", codecs=[BytesCodec()])
-
-    with pytest.raises(ValueError, match="Only one ArrayBytesCodec is allowed."):
-        Array.create(
-            MemoryStore(),
-            shape=5,
-            chunks=5,
-            dtype="<U4",
-            codecs=[BytesCodec(), VLenBytesCodec()],
-        )
-
-    with pytest.raises(
-        ValueError,
-        match="For string dtype, ArrayBytesCodec must be `VLenUTF8Codec`, got `BytesCodec`.",
-    ):
-        zarr.create_array(
-            MemoryStore(), shape=(5,), chunks=(5,), dtype="<U4", serializer=BytesCodec()
-        )
+        Array.create(MemoryStore(), shape=5, chunks=5, dtype="O", codecs=[BytesCodec()])
 
 
 @pytest.mark.parametrize("zarr_format", [2, 3])
@@ -492,7 +474,7 @@ def test_info_v2(self, chunks: tuple[int, int], shards: tuple[int, int] | None)
         result = arr.info
         expected = ArrayInfo(
             _zarr_format=2,
-            _data_type=np.dtype("float64"),
+            _data_type=arr.dtype,
             _shape=(8, 8),
             _chunk_shape=chunks,
             _shard_shape=None,
@@ -509,7 +491,7 @@ def test_info_v3(self, chunks: tuple[int, int], shards: tuple[int, int] | None)
         result = arr.info
         expected = ArrayInfo(
             _zarr_format=3,
-            _data_type=arr.metadata.data_type,
+            _data_type=arr.dtype,
             _shape=(8, 8),
             _chunk_shape=chunks,
             _shard_shape=shards,
@@ -534,7 +516,7 @@ def test_info_complete(self, chunks: tuple[int, int], shards: tuple[int, int] |
         result = arr.info_complete()
         expected = ArrayInfo(
             _zarr_format=3,
-            _data_type=arr.metadata.data_type,
+            _data_type=arr.dtype,
             _shape=(8, 8),
             _chunk_shape=chunks,
             _shard_shape=shards,
@@ -594,7 +576,7 @@ async def test_info_v3_async(
         result = arr.info
         expected = ArrayInfo(
             _zarr_format=3,
-            _data_type=arr.metadata.data_type,
+            _data_type=arr.dtype,
             _shape=(8, 8),
             _chunk_shape=chunks,
             _shard_shape=shards,
@@ -621,7 +603,7 @@ async def test_info_complete_async(
         result = await arr.info_complete()
         expected = ArrayInfo(
             _zarr_format=3,
-            _data_type=arr.metadata.data_type,
+            _data_type=arr.dtype,
             _shape=(8, 8),
             _chunk_shape=chunks,
             _shard_shape=shards,
@@ -972,44 +954,6 @@ def test_default_fill_value(dtype: str, fill_value_expected: object, store: Stor
         a = zarr.create_array(store, shape=(5,), chunks=(5,), dtype=dtype)
         assert a.fill_value == fill_value_expected
 
-    @staticmethod
-    @pytest.mark.parametrize("dtype", ["uint8", "float32", "str"])
-    @pytest.mark.parametrize("empty_value", [None, ()])
-    async def test_no_filters_compressors(
-        store: MemoryStore, dtype: str, empty_value: object, zarr_format: ZarrFormat
-    ) -> None:
-        """
-        Test that the default ``filters`` and ``compressors`` are removed when ``create_array`` is invoked.
-        """
-
-        arr = await create_array(
-            store=store,
-            dtype=dtype,
-            shape=(10,),
-            zarr_format=zarr_format,
-            compressors=empty_value,
-            filters=empty_value,
-        )
-        # Test metadata explicitly
-        if zarr_format == 2:
-            assert arr.metadata.zarr_format == 2  # guard for mypy
-            # v2 spec requires that filters be either a collection with at least one filter, or None
-            assert arr.metadata.filters is None
-            # Compressor is a single element in v2 metadata; the absence of a compressor is encoded
-            # as None
-            assert arr.metadata.compressor is None
-
-            assert arr.filters == ()
-            assert arr.compressors == ()
-        else:
-            assert arr.metadata.zarr_format == 3  # guard for mypy
-            if dtype == "str":
-                assert arr.metadata.codecs == (VLenUTF8Codec(),)
-                assert arr.serializer == VLenUTF8Codec()
-            else:
-                assert arr.metadata.codecs == (BytesCodec(),)
-                assert arr.serializer == BytesCodec()
-
     @staticmethod
     @pytest.mark.parametrize("dtype", ["uint8", "float32", "str", "U3", "S4", "V1"])
     @pytest.mark.parametrize(
@@ -1131,28 +1075,27 @@ async def test_v2_chunk_encoding(
         assert arr.filters == filters_expected
 
     @staticmethod
-    @pytest.mark.parametrize("dtype", ["uint8", "float32", "str"])
+    @pytest.mark.parametrize("dtype_str", ["uint8", "float32", "str"])
     async def test_default_filters_compressors(
-        store: MemoryStore, dtype: str, zarr_format: ZarrFormat
+        store: MemoryStore, dtype_str: str, zarr_format: ZarrFormat
     ) -> None:
         """
         Test that the default ``filters`` and ``compressors`` are used when ``create_array`` is invoked with ``filters`` and ``compressors`` unspecified.
         """
+        zdtype = get_data_type_from_numpy(dtype_str)
         arr = await create_array(
             store=store,
-            dtype=dtype,
+            dtype=dtype_str,
             shape=(10,),
             zarr_format=zarr_format,
         )
         if zarr_format == 3:
             expected_filters, expected_serializer, expected_compressors = (
-                _get_default_chunk_encoding_v3(np_dtype=np.dtype(dtype))
+                _get_default_chunk_encoding_v3(dtype=zdtype)
             )
 
         elif zarr_format == 2:
-            default_filters, default_compressors = _get_default_chunk_encoding_v2(
-                dtype=np.dtype(dtype)
-            )
+            default_filters, default_compressors = _get_default_chunk_encoding_v2(dtype=zdtype)
             if default_filters is None:
                 expected_filters = ()
             else:
diff --git a/tests/test_codecs/test_vlen.py b/tests/test_codecs/test_vlen.py
index f73b5e1969..a6c01153ff 100644
--- a/tests/test_codecs/test_vlen.py
+++ b/tests/test_codecs/test_vlen.py
@@ -8,9 +8,9 @@
 from zarr.abc.codec import Codec
 from zarr.abc.store import Store
 from zarr.codecs import ZstdCodec
+from zarr.core.metadata.dtype import get_data_type_from_numpy
 from zarr.core.metadata.v3 import ArrayV3Metadata
 from zarr.core.strings import _NUMPY_SUPPORTS_VLEN_STRING
-from zarr.registry import get_data_type_from_numpy
 from zarr.storage import StorePath
 
 numpy_str_dtypes: list[type | str | None] = [None, str, "str", np.dtypes.StrDType, "S", "U"]
@@ -46,9 +46,11 @@ def test_vlen_string(
     # should also work if input array is an object array, provided we explicitly specified
     # a stringlike dtype when creating the Array
     if as_object_array:
-        data = data.astype("O")
+        data_obj = data.astype("O")
 
-    a[:, :] = data
+        a[:, :] = data_obj
+    else:
+        a[:, :] = data
     assert np.array_equal(data, a[:, :])
     assert a.metadata.data_type == get_data_type_from_numpy(data.dtype)
     assert a.dtype == data.dtype
@@ -59,39 +61,3 @@ def test_vlen_string(
     assert np.array_equal(data, b[:, :])
     assert b.metadata.data_type == get_data_type_from_numpy(data.dtype)
     assert a.dtype == data.dtype
-
-
-@pytest.mark.parametrize("store", ["memory", "local"], indirect=["store"])
-@pytest.mark.parametrize("as_object_array", [False, True])
-@pytest.mark.parametrize("compressor", [None, ZstdCodec()])
-def test_vlen_bytes(store: Store, as_object_array: bool, compressor: Codec | None) -> None:
-    bstrings = [b"hello", b"world", b"this", b"is", b"a", b"test"]
-    data = np.array(bstrings).reshape((2, 3))
-    assert data.dtype == "|S5"
-
-    sp = StorePath(store, path="string")
-    a = zarr.create_array(
-        sp,
-        shape=data.shape,
-        chunks=data.shape,
-        dtype=data.dtype,
-        fill_value=b"",
-        compressors=compressor,
-    )
-    assert isinstance(a.metadata, ArrayV3Metadata)  # needed for mypy
-
-    # should also work if input array is an object array, provided we explicitly specified
-    # a bytesting-like dtype when creating the Array
-    if as_object_array:
-        data = data.astype("O")
-    a[:, :] = data
-    assert np.array_equal(data, a[:, :])
-    assert a.metadata.data_type == DataType.bytes
-    assert a.dtype == "O"
-
-    # test round trip
-    b = Array.open(sp)
-    assert isinstance(b.metadata, ArrayV3Metadata)  # needed for mypy
-    assert np.array_equal(data, b[:, :])
-    assert b.metadata.data_type == DataType.bytes
-    assert a.dtype == "O"
diff --git a/tests/test_group.py b/tests/test_group.py
index 521819ea0e..378e65d26a 100644
--- a/tests/test_group.py
+++ b/tests/test_group.py
@@ -993,7 +993,7 @@ async def test_asyncgroup_create_array(
     assert subnode.dtype == dtype
     # todo: fix the type annotation of array.metadata.chunk_grid so that we get some autocomplete
     # here.
-    assert subnode.metadata.chunk_grid.chunk_shape == chunk_shape
+    assert subnode.chunk_grid.chunk_shape == chunk_shape
     assert subnode.metadata.zarr_format == zarr_format
 
 
diff --git a/tests/test_metadata/test_consolidated.py b/tests/test_metadata/test_consolidated.py
index c1ff2e130a..a81625b7eb 100644
--- a/tests/test_metadata/test_consolidated.py
+++ b/tests/test_metadata/test_consolidated.py
@@ -20,6 +20,7 @@
 from zarr.core.buffer import cpu, default_buffer_prototype
 from zarr.core.group import ConsolidatedMetadata, GroupMetadata
 from zarr.core.metadata import ArrayV3Metadata
+from zarr.core.metadata.dtype import get_data_type_from_numpy
 from zarr.core.metadata.v2 import ArrayV2Metadata
 from zarr.storage import StorePath
 
@@ -503,7 +504,7 @@ async def test_consolidated_metadata_backwards_compatibility(
     async def test_consolidated_metadata_v2(self):
         store = zarr.storage.MemoryStore()
         g = await AsyncGroup.from_store(store, attributes={"key": "root"}, zarr_format=2)
-        dtype = "uint8"
+        dtype = get_data_type_from_numpy("uint8")
         await g.create_array(name="a", shape=(1,), attributes={"key": "a"}, dtype=dtype)
         g1 = await g.create_group(name="g1", attributes={"key": "g1"})
         await g1.create_group(name="g2", attributes={"key": "g2"})
diff --git a/tests/test_metadata/test_v2.py b/tests/test_metadata/test_v2.py
index 4600a977d4..2637224f93 100644
--- a/tests/test_metadata/test_v2.py
+++ b/tests/test_metadata/test_v2.py
@@ -3,7 +3,6 @@
 import json
 from typing import TYPE_CHECKING, Literal
 
-import numpy as np
 import pytest
 
 import zarr.api.asynchronous
@@ -12,6 +11,7 @@
 from zarr.core.buffer.core import default_buffer_prototype
 from zarr.core.group import ConsolidatedMetadata, GroupMetadata
 from zarr.core.metadata import ArrayV2Metadata
+from zarr.core.metadata.dtype import Float32, Float64, Int16
 from zarr.core.metadata.v2 import parse_zarr_format
 
 if TYPE_CHECKING:
@@ -219,7 +219,7 @@ async def test_read_consolidated_metadata(
                     fill_value=0,
                     chunks=(730,),
                     attributes={"_ARRAY_DIMENSIONS": ["time"], "dataset": "NMC Reanalysis"},
-                    dtype=np.dtype("int16"),
+                    dtype=Int16(),
                     order="C",
                     filters=None,
                     dimension_separator=".",
@@ -236,7 +236,7 @@ async def test_read_consolidated_metadata(
                         "standard_name": "time",
                         "units": "hours since 1800-01-01",
                     },
-                    dtype=np.dtype("float32"),
+                    dtype=Float32(),
                     order="C",
                     filters=None,
                     dimension_separator=".",
@@ -254,7 +254,7 @@ async def test_read_consolidated_metadata(
                                 attributes={
                                     "calendar": "standard",
                                 },
-                                dtype=np.dtype("float32"),
+                                dtype=Float32(),
                                 order="C",
                                 filters=None,
                                 dimension_separator=".",
@@ -295,7 +295,7 @@ def test_from_dict_extra_fields() -> None:
     expected = ArrayV2Metadata(
         attributes={"key": "value"},
         shape=(8,),
-        dtype="float64",
+        dtype=Float64(),
         chunks=(8,),
         fill_value=0.0,
         order="C",
diff --git a/tests/test_metadata/test_v3.py b/tests/test_metadata/test_v3.py
index 54e077f1a6..37d8704b50 100644
--- a/tests/test_metadata/test_v3.py
+++ b/tests/test_metadata/test_v3.py
@@ -12,7 +12,7 @@
 from zarr.core.chunk_key_encodings import DefaultChunkKeyEncoding, V2ChunkKeyEncoding
 from zarr.core.config import config
 from zarr.core.group import GroupMetadata, parse_node_type
-from zarr.core.metadata.dtype import complex_from_json
+from zarr.core.metadata.dtype import complex_from_json, get_data_type_from_numpy
 from zarr.core.metadata.v3 import (
     ArrayV3Metadata,
     parse_dimension_names,
@@ -20,7 +20,6 @@
 )
 from zarr.core.strings import _NUMPY_SUPPORTS_VLEN_STRING
 from zarr.errors import MetadataValidationError
-from zarr.registry import get_data_type_from_numpy
 
 if TYPE_CHECKING:
     from collections.abc import Sequence
@@ -56,9 +55,9 @@
 complex_dtypes = ("complex64", "complex128")
 flexible_dtypes = ("str", "bytes", "void")
 if _NUMPY_SUPPORTS_VLEN_STRING:
-    vlen_string_dtypes = ("T", "O")
+    vlen_string_dtypes = ("T",)
 else:
-    vlen_string_dtypes = "O"
+    vlen_string_dtypes = ("O",)
 
 dtypes = (
     *bool_dtypes,
@@ -182,7 +181,7 @@ def test_parse_fill_value_invalid_type_sequence(fill_value: Any, dtype_str: str)
 
 @pytest.mark.parametrize("chunk_grid", ["regular"])
 @pytest.mark.parametrize("attributes", [None, {"foo": "bar"}])
-@pytest.mark.parametrize("codecs", [[BytesCodec()]])
+@pytest.mark.parametrize("codecs", [[BytesCodec(endian=None)]])
 @pytest.mark.parametrize("fill_value", [0, 1])
 @pytest.mark.parametrize("chunk_key_encoding", ["v2", "default"])
 @pytest.mark.parametrize("dimension_separator", [".", "/", None])
diff --git a/tests/test_v2.py b/tests/test_v2.py
index 0a4487cfcc..c5ed39472f 100644
--- a/tests/test_v2.py
+++ b/tests/test_v2.py
@@ -85,14 +85,14 @@ def test_codec_pipeline() -> None:
 
 
 @pytest.mark.parametrize(
-    ("dtype", "expected_dtype", "fill_value", "fill_value_encoding"),
+    ("dtype", "expected_dtype", "fill_value", "fill_value_json"),
     [
         ("|S", "|S0", b"X", "WA=="),
         ("|V", "|V0", b"X", "WA=="),
         ("|V10", "|V10", b"X", "WAAAAAAAAAAAAA=="),
     ],
 )
-async def test_v2_encode_decode(dtype, expected_dtype, fill_value, fill_value_encoding) -> None:
+async def test_v2_encode_decode(dtype, expected_dtype, fill_value, fill_value_json) -> None:
     with config.set(
         {
             "array.v2_default_filters.bytes": [{"id": "vlen-bytes"}],
@@ -113,7 +113,7 @@ async def test_v2_encode_decode(dtype, expected_dtype, fill_value, fill_value_en
             "chunks": [3],
             "compressor": None,
             "dtype": expected_dtype,
-            "fill_value": fill_value_encoding,
+            "fill_value": fill_value_json,
             "filters": [{"id": "vlen-bytes"}] if dtype == "|S" else None,
             "order": "C",
             "shape": [3],
@@ -127,37 +127,30 @@ async def test_v2_encode_decode(dtype, expected_dtype, fill_value, fill_value_en
         np.testing.assert_equal(data, expected)
 
 
-@pytest.mark.parametrize("dtype_value", [["|S", b"Y"], ["|U", "Y"], ["O", b"Y"]])
-def test_v2_encode_decode_with_data(dtype_value):
-    dtype, value = dtype_value
-    with config.set(
-        {
-            "array.v2_default_filters": {
-                "string": [{"id": "vlen-utf8"}],
-                "bytes": [{"id": "vlen-bytes"}],
-            },
-        }
-    ):
-        expected = np.full((3,), value, dtype=dtype)
-        a = zarr.create(
-            shape=(3,),
-            zarr_format=2,
-            dtype=dtype,
-        )
-        a[:] = expected
-        data = a[:]
-        np.testing.assert_equal(data, expected)
+@pytest.mark.parametrize(("dtype", "value"), [("|S1", b"Y"), ("|U1", "Y"), ("O", "Y")])
+def test_v2_encode_decode_with_data(dtype, value):
+    dtype, value = dtype, value
+    expected = np.full((3,), value, dtype=dtype)
+    a = zarr.create(
+        shape=(3,),
+        zarr_format=2,
+        dtype=dtype,
+    )
+    a[:] = expected
+    data = a[:]
+    np.testing.assert_equal(data, expected)
 
 
 @pytest.mark.parametrize("dtype", [str, "str"])
 async def test_create_dtype_str(dtype: Any) -> None:
+    data = ["a", "bb", "ccc"]
     arr = zarr.create(shape=3, dtype=dtype, zarr_format=2)
     assert arr.dtype.kind == "O"
     assert arr.metadata.to_dict()["dtype"] == "|O"
-    assert arr.metadata.filters == (numcodecs.vlen.VLenBytes(),)
-    arr[:] = [b"a", b"bb", b"ccc"]
+    assert arr.metadata.filters == (numcodecs.vlen.VLenUTF8(),)
+    arr[:] = data
     result = arr[:]
-    np.testing.assert_array_equal(result, np.array([b"a", b"bb", b"ccc"], dtype="object"))
+    np.testing.assert_array_equal(result, np.array(data, dtype="object"))
 
 
 @pytest.mark.parametrize("filters", [[], [numcodecs.Delta(dtype="<i4")], [numcodecs.Zlib(level=2)]])
@@ -261,38 +254,6 @@ def test_default_compressor_deprecation_warning():
         zarr.storage.default_compressor = "zarr.codecs.zstd.ZstdCodec()"
 
 
-@pytest.mark.parametrize(
-    "dtype_expected",
-    [
-        ["b", "zstd", None],
-        ["i", "zstd", None],
-        ["f", "zstd", None],
-        ["|S1", "zstd", "vlen-bytes"],
-        ["|U1", "zstd", "vlen-utf8"],
-    ],
-)
-def test_default_filters_and_compressor(dtype_expected: Any) -> None:
-    with config.set(
-        {
-            "array.v2_default_compressor": {
-                "numeric": {"id": "zstd", "level": "0"},
-                "string": {"id": "zstd", "level": "0"},
-                "bytes": {"id": "zstd", "level": "0"},
-            },
-            "array.v2_default_filters": {
-                "numeric": [],
-                "string": [{"id": "vlen-utf8"}],
-                "bytes": [{"id": "vlen-bytes"}],
-            },
-        }
-    ):
-        dtype, expected_compressor, expected_filter = dtype_expected
-        arr = zarr.create(shape=(3,), path="foo", store={}, zarr_format=2, dtype=dtype)
-        assert arr.metadata.compressor.codec_id == expected_compressor
-        if expected_filter is not None:
-            assert arr.metadata.filters[0].codec_id == expected_filter
-
-
 @pytest.mark.parametrize("fill_value", [None, (b"", 0, 0.0)], ids=["no_fill", "fill"])
 def test_structured_dtype_roundtrip(fill_value, tmp_path) -> None:
     a = np.array(

From c1a85663f7f4a939a84188d81d025f0561cf4a73 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Tue, 4 Mar 2025 23:08:15 +0100
Subject: [PATCH 016/130] dtype-specific tests

---
 src/zarr/core/metadata/dtype.py   | 161 ++++++++++++++++--------
 tests/test_metadata/test_dtype.py | 203 ++++++++++++++++++++++++++++++
 2 files changed, 312 insertions(+), 52 deletions(-)
 create mode 100644 tests/test_metadata/test_dtype.py

diff --git a/src/zarr/core/metadata/dtype.py b/src/zarr/core/metadata/dtype.py
index a573794730..590ab7df67 100644
--- a/src/zarr/core/metadata/dtype.py
+++ b/src/zarr/core/metadata/dtype.py
@@ -172,7 +172,7 @@ def structured_scalar_to_json(data: bytes, zarr_format: ZarrFormat) -> str:
     raise NotImplementedError(f"Invalid zarr format: {zarr_format}. Expected 2.")
 
 
-def structured_scalar_from_json(data: JSON, zarr_format: ZarrFormat) -> bytes:
+def structured_scalar_from_json(data: str, zarr_format: ZarrFormat) -> bytes:
     if zarr_format == 2:
         return base64.b64decode(data.encode("ascii"))
     raise NotImplementedError(f"Invalid zarr format: {zarr_format}. Expected 2.")
@@ -202,11 +202,13 @@ def float_from_json(data: JSONFloat, zarr_format: ZarrFormat) -> float:
         return float_from_json_v3(data)
 
 
-def complex_from_json_v2(data: JSONFloat, dtype: Any) -> np.complexfloating:
-    return dtype.type(data)
+def complex_from_json_v2(data: JSONFloat, dtype: Any) -> np.complexfloating[Any, Any]:
+    return dtype.type(complex(*data))
 
 
-def complex_from_json_v3(data: tuple[JSONFloat, JSONFloat], dtype: Any) -> np.complexfloating:
+def complex_from_json_v3(
+    data: tuple[JSONFloat, JSONFloat], dtype: Any
+) -> np.complexfloating[Any, Any]:
     return dtype.type(complex(*data))
 
 
@@ -223,6 +225,14 @@ def complex_from_json(
     raise ValueError(f"Invalid zarr format: {zarr_format}. Expected 2 or 3.")
 
 
+def datetime_to_json(data: np.datetime64[Any]) -> int:
+    return data.view("int").item()
+
+
+def datetime_from_json(data: int, unit: DateUnit | TimeUnit) -> np.datetime64[Any]:
+    return np.int64(data).view(f"datetime64[{unit}]")
+
+
 TDType = TypeVar("TDType", bound=np.dtype[Any])
 TScalar = TypeVar("TScalar", bound=np.generic | str)
 
@@ -231,8 +241,6 @@ def complex_from_json(
 class DTypeWrapper(Generic[TDType, TScalar], ABC, Metadata):
     name: ClassVar[str]
     dtype_cls: ClassVar[type[TDType]]  # this class will create a numpy dtype
-    kind: ClassVar[DataTypeFlavor]
-    default_value: ClassVar[TScalar]
     endianness: Endianness | None = "native"
 
     def __init_subclass__(cls) -> None:
@@ -248,6 +256,9 @@ def to_dict(self) -> dict[str, JSON]:
     def cast_value(self: Self, value: object) -> TScalar:
         return cast(np.generic, self.unwrap().type(value))
 
+    @abstractmethod
+    def default_value(self) -> TScalar: ...
+
     @classmethod
     def check_dtype(cls: type[Self], dtype: TDType) -> TypeGuard[TDType]:
         """
@@ -291,8 +302,9 @@ def from_json_value(self: Self, data: JSON, *, zarr_format: ZarrFormat) -> TScal
 @dataclass(frozen=True, kw_only=True)
 class Bool(DTypeWrapper[np.dtypes.BoolDType, np.bool_]):
     name = "bool"
-    kind = "boolean"
-    default_value = np.False_
+
+    def default_value(self) -> np.bool_:
+        return np.False_
 
     @classmethod
     def _wrap_unsafe(cls, dtype: np.dtypes.BoolDType) -> Self:
@@ -308,7 +320,8 @@ def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.bool_:
 
 
 class IntWrapperBase(DTypeWrapper[TDType, TScalar]):
-    kind = "numeric"
+    def default_value(self) -> TScalar:
+        return self.unwrap().type(0)
 
     @classmethod
     def _wrap_unsafe(cls, dtype: TDType) -> Self:
@@ -326,53 +339,46 @@ def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> TScalar:
 @dataclass(frozen=True, kw_only=True)
 class Int8(IntWrapperBase[np.dtypes.Int8DType, np.int8]):
     name = "int8"
-    default_value = np.int8(0)
 
 
 @dataclass(frozen=True, kw_only=True)
 class UInt8(IntWrapperBase[np.dtypes.UInt8DType, np.uint8]):
     name = "uint8"
-    default_value = np.uint8(0)
 
 
 @dataclass(frozen=True, kw_only=True)
 class Int16(IntWrapperBase[np.dtypes.Int16DType, np.int16]):
     name = "int16"
-    default_value = np.int16(0)
 
 
 @dataclass(frozen=True, kw_only=True)
 class UInt16(IntWrapperBase[np.dtypes.UInt16DType, np.uint16]):
     name = "uint16"
-    default_value = np.uint16(0)
 
 
 @dataclass(frozen=True, kw_only=True)
 class Int32(IntWrapperBase[np.dtypes.Int32DType, np.int32]):
     name = "int32"
-    default_value = np.int32(0)
 
 
 @dataclass(frozen=True, kw_only=True)
 class UInt32(IntWrapperBase[np.dtypes.UInt32DType, np.uint32]):
     name = "uint32"
-    default_value = np.uint32(0)
 
 
 @dataclass(frozen=True, kw_only=True)
 class Int64(IntWrapperBase[np.dtypes.Int64DType, np.int64]):
     name = "int64"
-    default_value = np.int64(0)
 
 
 @dataclass(frozen=True, kw_only=True)
 class UInt64(IntWrapperBase[np.dtypes.UInt64DType, np.uint64]):
     name = "uint64"
-    default_value = np.uint64(0)
 
 
 class FloatWrapperBase(DTypeWrapper[TDType, TScalar]):
-    kind = "numeric"
+    def default_value(self) -> TScalar:
+        return self.unwrap().type(0.0)
 
     @classmethod
     def _wrap_unsafe(cls, dtype: TDType) -> Self:
@@ -390,26 +396,24 @@ def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> TScalar:
 @dataclass(frozen=True, kw_only=True)
 class Float16(FloatWrapperBase[np.dtypes.Float16DType, np.float16]):
     name = "float16"
-    default_value = np.float16(0)
 
 
 @dataclass(frozen=True, kw_only=True)
 class Float32(FloatWrapperBase[np.dtypes.Float32DType, np.float32]):
     name = "float32"
-    default_value = np.float32(0)
 
 
 @dataclass(frozen=True, kw_only=True)
 class Float64(FloatWrapperBase[np.dtypes.Float64DType, np.float64]):
     name = "float64"
-    default_value = np.float64(0)
 
 
 @dataclass(frozen=True, kw_only=True)
 class Complex64(DTypeWrapper[np.dtypes.Complex64DType, np.complex64]):
     name = "complex64"
-    kind = "numeric"
-    default_value = np.complex64(0)
+
+    def default_value(self) -> np.complex64:
+        return np.complex64(0.0)
 
     @classmethod
     def _wrap_unsafe(cls, dtype: np.dtypes.Complex64DType) -> Self:
@@ -429,8 +433,9 @@ def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.complex6
 @dataclass(frozen=True, kw_only=True)
 class Complex128(DTypeWrapper[np.dtypes.Complex128DType, np.complex128]):
     name = "complex128"
-    kind = "numeric"
-    default_value = np.complex128(0)
+
+    def default_value(self) -> np.complex128:
+        return np.complex128(0.0)
 
     @classmethod
     def _wrap_unsafe(cls, dtype: np.dtypes.Complex128DType) -> Self:
@@ -464,10 +469,11 @@ def unwrap(self) -> TDType:
 @dataclass(frozen=True, kw_only=True)
 class StaticByteString(FlexibleWrapperBase[np.dtypes.BytesDType, np.bytes_]):
     name = "numpy/static_byte_string"
-    kind = "string"
-    default_value = np.bytes_(0)
     item_size_bits = 8
 
+    def default_value(self) -> np.bytes_:
+        return np.bytes_(b"")
+
     def to_dict(self) -> dict[str, JSON]:
         return {"name": self.name, "configuration": {"length": self.length}}
 
@@ -476,17 +482,18 @@ def to_json_value(self, data: np.generic, *, zarr_format: ZarrFormat) -> str:
 
     def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.bytes_:
         if check_json_str(data):
-            return self.unwrap().type(data.encode("ascii"))
+            return self.unwrap().type(base64.standard_b64decode(data.encode("ascii")))
         raise TypeError(f"Invalid type: {data}. Expected a string.")
 
 
 @dataclass(frozen=True, kw_only=True)
 class StaticRawBytes(FlexibleWrapperBase[np.dtypes.VoidDType, np.void]):
     name = "r*"
-    kind = "bytes"
-    default_value = np.void(b"")
     item_size_bits = 8
 
+    def default_value(self) -> np.void:
+        return np.void(b"")
+
     def to_dict(self) -> dict[str, JSON]:
         return {"name": f"r{self.length * self.item_size_bits}"}
 
@@ -496,21 +503,22 @@ def unwrap(self) -> np.dtypes.VoidDType:
         endianness_code = endianness_to_numpy_str(self.endianness)
         return np.dtype(f"{endianness_code}V{self.length}")
 
-    def to_json_value(self, data: np.generic, *, zarr_format: ZarrFormat) -> tuple[int, ...]:
+    def to_json_value(self, data: np.generic, *, zarr_format: ZarrFormat) -> str:
         return base64.standard_b64encode(data).decode("ascii")
 
     def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.void:
         # todo: check that this is well-formed
-        return self.unwrap().type(bytes(data))
+        return self.unwrap().type(base64.standard_b64decode(data))
 
 
 @dataclass(frozen=True, kw_only=True)
 class StaticUnicodeString(FlexibleWrapperBase[np.dtypes.StrDType, np.str_]):
     name = "numpy/static_unicode_string"
-    kind = "string"
-    default_value = np.str_("")
     item_size_bits = 32  # UCS4 is 32 bits per code point
 
+    def default_value(self) -> np.str_:
+        return np.str_("")
+
     def to_dict(self) -> dict[str, JSON]:
         return {"name": self.name, "configuration": {"length": self.length}}
 
@@ -528,8 +536,9 @@ def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.str_:
     @dataclass(frozen=True, kw_only=True)
     class VariableLengthString(DTypeWrapper[np.dtypes.StringDType, str]):
         name = "numpy/vlen_string"
-        kind = "string"
-        default_value = ""
+
+        def default_value(self) -> str:
+            return ""
 
         @classmethod
         def _wrap_unsafe(cls, dtype: np.dtypes.StringDType) -> Self:
@@ -555,10 +564,11 @@ def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> str:
     @dataclass(frozen=True, kw_only=True)
     class VariableLengthString(DTypeWrapper[np.dtypes.ObjectDType, str]):
         name = "numpy/vlen_string"
-        kind = "string"
-        default_value = np.object_("")
         endianness: Endianness = field(default=None)
 
+        def default_value(self) -> str:
+            return ""
+
         def __post_init__(self) -> None:
             if self.endianness is not None:
                 raise ValueError("VariableLengthString does not support endianness.")
@@ -570,24 +580,57 @@ def to_dict(self) -> dict[str, JSON]:
         def _wrap_unsafe(cls, dtype: np.dtypes.ObjectDType) -> Self:
             return cls()
 
-        def unwrap(self) -> np.dtypes.ObjectDType:
-            return super().unwrap()
-
         def to_json_value(self, data: np.generic, *, zarr_format: ZarrFormat) -> str:
             return str(data)
 
         def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> str:
+            """
+            String literals pass through
+            """
             if not check_json_str(data):
                 raise TypeError(f"Invalid type: {data}. Expected a string.")
-            return self.unwrap().type(data)
+            return data
+
+
+DateUnit = Literal["Y", "M", "W", "D"]
+TimeUnit = Literal["h", "m", "s", "ms", "us", "μs", "ns", "ps", "fs", "as"]
 
 
 @dataclass(frozen=True, kw_only=True)
-class StructuredDtype(DTypeWrapper[np.dtypes.VoidDType, np.void]):
+class DateTime64(DTypeWrapper[np.dtypes.DateTime64DType, np.datetime64]):
+    name = "numpy/datetime64"
+    unit: DateUnit | TimeUnit
+
+    def default_value(self) -> np.datetime64:
+        return np.datetime64("NaT")
+
+    @classmethod
+    def _wrap_unsafe(cls, dtype: np.dtypes.DateTime64DType) -> Self:
+        unit = dtype.name[dtype.name.rfind("[") + 1 : dtype.name.rfind("]")]
+        return cls(unit=unit)
+
+    def unwrap(self) -> np.dtypes.DateTime64DType:
+        return np.dtype(f"datetime64[{self.unit}]").newbyteorder(
+            endianness_to_numpy_str(self.endianness)
+        )
+
+    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.datetime64:
+        if check_json_int(data):
+            return datetime_from_json(data, self.unit)
+        raise TypeError(f"Invalid type: {data}. Expected an integer.")
+
+    def to_json_value(self, data: np.datetime64, *, zarr_format: ZarrFormat) -> int:
+        return datetime_to_json(data)
+
+
+@dataclass(frozen=True, kw_only=True)
+class Structured(DTypeWrapper[np.dtypes.VoidDType, np.void]):
     name = "numpy/struct"
-    kind = "struct"
     fields: tuple[tuple[str, DTypeWrapper[Any, Any], int], ...]
 
+    def default_value(self) -> np.void:
+        return np.array([0], dtype=self.unwrap())[0]
+
     @classmethod
     def check_dtype(cls, dtype: np.dtypes.DTypeLike) -> TypeGuard[np.dtypes.VoidDType]:
         """
@@ -608,6 +651,9 @@ def _wrap_unsafe(cls, dtype: np.dtypes.VoidDType) -> Self:
 
         return cls(fields=tuple(fields))
 
+    def unwrap(self) -> np.dtypes.VoidDType:
+        return np.dtype([(key, dtype.unwrap()) for (key, dtype, _) in self.fields])
+
     def to_json_value(self, data: np.generic, *, zarr_format: ZarrFormat) -> str:
         return structured_scalar_to_json(data.tobytes(), zarr_format)
 
@@ -629,7 +675,10 @@ def get_data_type_from_numpy(dtype: npt.DTypeLike) -> DTypeWrapper:
         np_dtype = np.dtype(dtype)
     data_type_registry.lazy_load()
     for val in data_type_registry.contents.values():
-        return val.wrap(np_dtype)
+        try:
+            return val.wrap(np_dtype)
+        except TypeError:
+            pass
     raise ValueError(
         f"numpy dtype '{dtype}' does not have a corresponding Zarr dtype in: {list(data_type_registry.contents)}."
     )
@@ -689,11 +738,11 @@ def get(self, key: str) -> type[DTypeWrapper[Any, Any]]:
         return self.contents[key]
 
     def match_dtype(self, dtype: npt.DTypeLike) -> DTypeWrapper[Any, Any]:
-        data_type_registry.lazy_load()
-        for val in data_type_registry.contents.values():
+        self.lazy_load()
+        for val in self.contents.values():
             try:
-                return val._wrap_unsafe(dtype)
-            except ValueError:
+                return val.wrap(dtype)
+            except TypeError:
                 pass
         raise ValueError(f"No data type wrapper found that matches {dtype}")
 
@@ -708,7 +757,15 @@ def register_data_type(cls: type[DTypeWrapper[Any, Any]]) -> None:
 FLOAT_DTYPE = Float16 | Float32 | Float64
 COMPLEX_DTYPE = Complex64 | Complex128
 STRING_DTYPE = StaticUnicodeString | VariableLengthString | StaticByteString
-for dtype in get_args(
-    Bool | INTEGER_DTYPE | FLOAT_DTYPE | COMPLEX_DTYPE | STRING_DTYPE | StaticRawBytes
-):
+DTYPE = (
+    Bool
+    | INTEGER_DTYPE
+    | FLOAT_DTYPE
+    | COMPLEX_DTYPE
+    | STRING_DTYPE
+    | StaticRawBytes
+    | Structured
+    | DateTime64
+)
+for dtype in get_args(DTYPE):
     register_data_type(dtype)
diff --git a/tests/test_metadata/test_dtype.py b/tests/test_metadata/test_dtype.py
new file mode 100644
index 0000000000..a3f29a34f5
--- /dev/null
+++ b/tests/test_metadata/test_dtype.py
@@ -0,0 +1,203 @@
+from __future__ import annotations
+
+from typing import Any
+
+import numpy as np
+import pytest
+
+from zarr.core.metadata.dtype import (
+    Bool,
+    Complex64,
+    Complex128,
+    DataTypeRegistry,
+    DateTime64,
+    DTypeWrapper,
+    Float16,
+    Float32,
+    Float64,
+    Int8,
+    Int16,
+    Int32,
+    Int64,
+    StaticByteString,
+    StaticRawBytes,
+    StaticUnicodeString,
+    Structured,
+    UInt8,
+    UInt16,
+    UInt32,
+    UInt64,
+    VariableLengthString,
+)
+
+_NUMPY_SUPPORTS_VLEN_STRING = hasattr(np.dtypes, "StringDType")
+if _NUMPY_SUPPORTS_VLEN_STRING:
+    VLEN_STRING_DTYPE = np.dtypes.StringDType()
+    VLEN_STRING_CODE = "T"
+else:
+    VLEN_STRING_DTYPE = np.dtypes.ObjectDType()
+    VLEN_STRING_CODE = "O"
+
+
+@pytest.mark.parametrize(
+    ("wrapper_cls", "np_dtype"),
+    [
+        (Bool, "bool"),
+        (Int8, "int8"),
+        (Int16, "int16"),
+        (Int32, "int32"),
+        (Int64, "int64"),
+        (UInt8, "uint8"),
+        (UInt16, "uint16"),
+        (UInt32, "uint32"),
+        (UInt64, "uint64"),
+        (Float32, "float32"),
+        (Float64, "float64"),
+        (Complex64, "complex64"),
+        (Complex128, "complex128"),
+        (StaticUnicodeString, "U"),
+        (StaticByteString, "S"),
+        (StaticRawBytes, "V"),
+        (VariableLengthString, VLEN_STRING_CODE),
+        (Structured, np.dtype([("a", np.float64), ("b", np.int8)])),
+        (DateTime64, "datetime64[s]"),
+    ],
+)
+def test_wrap(wrapper_cls: type[DTypeWrapper[Any, Any]], np_dtype: np.dtype | str) -> None:
+    """
+    Test that the wrapper class has the correct dtype class bound to the dtype_cls variable
+    Test that the ``wrap`` method produces an instance of the wrapper class
+    Test that the ``unwrap`` method returns the original dtype
+    """
+    dt = np.dtype(np_dtype)
+    assert wrapper_cls.dtype_cls is type(dt)
+    wrapped = wrapper_cls.wrap(dt)
+
+    with pytest.raises(TypeError, match="Invalid dtype"):
+        wrapper_cls.wrap("not a dtype")
+
+    assert isinstance(wrapped, wrapper_cls)
+    assert wrapped.unwrap() == dt
+
+
+def test_registry_match() -> None:
+    """
+    Test that registering a dtype in a data type registry works
+    Test that match_dtype resolves a numpy dtype into the stored dtype
+    Test that match_dtype raises an error if the dtype is not registered
+    """
+    local_registry = DataTypeRegistry()
+    local_registry.register(Bool)
+    assert isinstance(local_registry.match_dtype(np.dtype("bool")), Bool)
+    outside_dtype = "int8"
+    with pytest.raises(
+        ValueError, match=f"No data type wrapper found that matches {outside_dtype}"
+    ):
+        local_registry.match_dtype(np.dtype(outside_dtype))
+
+
+# start writing new tests here
+
+
+@pytest.mark.parametrize(
+    ("wrapper", "expected_default"),
+    [
+        (Bool(), np.False_),
+        (Int8(), np.int8(0)),
+        (UInt8(), np.uint8(0)),
+        (Int16(), np.int16(0)),
+        (UInt16(), np.uint16(0)),
+        (Int32(), np.int32(0)),
+        (UInt32(), np.uint32(0)),
+        (Int64(), np.int64(0)),
+        (UInt64(), np.uint64(0)),
+        (Float16(), np.float16(0)),
+        (Float32(), np.float32(0)),
+        (Float64(), np.float64(0)),
+        (Complex64(), np.complex64(0)),
+        (Complex128(), np.complex128(0)),
+        (StaticByteString(length=3), np.bytes_(b"")),
+        (StaticRawBytes(length=3), np.void(b"")),
+        (StaticUnicodeString(length=3), np.str_("")),
+        (
+            Structured(fields=(("a", Float64(), 0), ("b", Int8(), 8))),
+            np.array([0], dtype=[("a", np.float64), ("b", np.int8)])[0],
+        ),
+        (VariableLengthString(), ""),
+        (DateTime64(unit="s"), np.datetime64("NaT")),
+    ],
+)
+def test_default_value(wrapper: type[DTypeWrapper[Any, Any]], expected_default: Any) -> None:
+    """
+    Test that the default_value method is correctly set for each dtype wrapper.
+    """
+    if isinstance(wrapper, DateTime64):
+        assert np.isnan(wrapper.default_value())
+    else:
+        assert wrapper.default_value() == expected_default
+
+
+@pytest.mark.parametrize(
+    ("wrapper", "input_value", "expected_json"),
+    [
+        (Bool(), np.bool_(True), True),
+        (Int8(), np.int8(42), 42),
+        (UInt8(), np.uint8(42), 42),
+        (Int16(), np.int16(42), 42),
+        (UInt16(), np.uint16(42), 42),
+        (Int32(), np.int32(42), 42),
+        (UInt32(), np.uint32(42), 42),
+        (Int64(), np.int64(42), 42),
+        (UInt64(), np.uint64(42), 42),
+        (Float16(), np.float16(42.0), 42.0),
+        (Float32(), np.float32(42.0), 42.0),
+        (Float64(), np.float64(42.0), 42.0),
+        (Complex64(), np.complex64(42.0 + 1.0j), (42.0, 1.0)),
+        (Complex128(), np.complex128(42.0 + 1.0j), (42.0, 1.0)),
+        (StaticByteString(length=4), np.bytes_(b"test"), "dGVzdA=="),
+        (StaticRawBytes(length=4), np.void(b"test"), "dGVzdA=="),
+        (StaticUnicodeString(length=4), np.str_("test"), "test"),
+        (VariableLengthString(), "test", "test"),
+        (DateTime64(unit="s"), np.datetime64("2021-01-01T00:00:00", "s"), 1609459200),
+    ],
+)
+def test_to_json_value_v2(
+    wrapper: type[DTypeWrapper[Any, Any]], input_value: Any, expected_json: Any
+) -> None:
+    """
+    Test the to_json_value method for each dtype wrapper for zarr v2
+    """
+    assert wrapper.to_json_value(input_value, zarr_format=2) == expected_json
+
+
+@pytest.mark.parametrize(
+    ("wrapper", "json_value", "expected_value"),
+    [
+        (Bool(), True, np.bool_(True)),
+        (Int8(), 42, np.int8(42)),
+        (UInt8(), 42, np.uint8(42)),
+        (Int16(), 42, np.int16(42)),
+        (UInt16(), 42, np.uint16(42)),
+        (Int32(), 42, np.int32(42)),
+        (UInt32(), 42, np.uint32(42)),
+        (Int64(), 42, np.int64(42)),
+        (UInt64(), 42, np.uint64(42)),
+        (Float16(), 42.0, np.float16(42.0)),
+        (Float32(), 42.0, np.float32(42.0)),
+        (Float64(), 42.0, np.float64(42.0)),
+        (Complex64(), (42.0, 1.0), np.complex64(42.0 + 1.0j)),
+        (Complex128(), (42.0, 1.0), np.complex128(42.0 + 1.0j)),
+        (StaticByteString(length=4), "dGVzdA==", np.bytes_(b"test")),
+        (StaticRawBytes(length=4), "dGVzdA==", np.void(b"test")),
+        (StaticUnicodeString(length=4), "test", np.str_("test")),
+        (VariableLengthString(), "test", "test"),
+        (DateTime64(unit="s"), 1609459200, np.datetime64("2021-01-01T00:00:00", "s")),
+    ],
+)
+def test_from_json_value(
+    wrapper: type[DTypeWrapper[Any, Any]], json_value: Any, expected_value: Any
+) -> None:
+    """
+    Test the from_json_value method for each dtype wrapper.
+    """
+    assert wrapper.from_json_value(json_value, zarr_format=2) == expected_value

From 2868994b07a610121d707742ee025e4ba43f78e0 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Wed, 5 Mar 2025 16:57:23 +0100
Subject: [PATCH 017/130] more tests, fix void type default value logic

---
 src/zarr/core/array.py            |  11 +--
 src/zarr/core/buffer/core.py      |   4 +-
 src/zarr/core/codec_pipeline.py   |   2 +-
 src/zarr/core/metadata/dtype.py   |  87 +++++++++++++++-------
 src/zarr/core/metadata/v2.py      |  64 ++++------------
 tests/test_array.py               |  27 +------
 tests/test_metadata/test_dtype.py | 120 +++++++++++++++++++++++-------
 tests/test_metadata/test_v3.py    |  18 ++---
 tests/test_v2.py                  |  41 +---------
 9 files changed, 185 insertions(+), 189 deletions(-)

diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py
index abd862f023..7718aa505f 100644
--- a/src/zarr/core/array.py
+++ b/src/zarr/core/array.py
@@ -103,7 +103,8 @@
 )
 from zarr.core.metadata.dtype import (
     DTypeWrapper,
-    StaticByteString,
+    FixedLengthAsciiString,
+    FixedLengthUnicodeString,
     VariableLengthString,
     get_data_type_from_numpy,
 )
@@ -710,7 +711,7 @@ def _create_metadata_v3(
 
         if fill_value is None:
             # v3 spec will not allow a null fill value
-            fill_value_parsed = dtype.default_value
+            fill_value_parsed = dtype.default_value()
         else:
             fill_value_parsed = fill_value
 
@@ -4237,7 +4238,7 @@ def _get_default_chunk_encoding_v3(
 
     if isinstance(dtype, VariableLengthString):
         serializer = VLenUTF8Codec()
-    elif isinstance(dtype, StaticByteString):
+    elif isinstance(dtype, FixedLengthAsciiString):
         serializer = VLenBytesCodec()
     else:
         if dtype.unwrap().itemsize == 1:
@@ -4257,9 +4258,9 @@ def _get_default_chunk_encoding_v2(
     from numcodecs import VLenUTF8 as numcodecs_VLenUTF8
     from numcodecs import Zstd as numcodecs_zstd
 
-    if isinstance(dtype, VariableLengthString):
+    if isinstance(dtype, VariableLengthString | FixedLengthUnicodeString):
         filters = (numcodecs_VLenUTF8(),)
-    elif isinstance(dtype, StaticByteString):
+    elif isinstance(dtype, FixedLengthAsciiString):
         filters = (numcodecs_VLenBytes(),)
     else:
         filters = None
diff --git a/src/zarr/core/buffer/core.py b/src/zarr/core/buffer/core.py
index ccab103e0f..23ac5d3a69 100644
--- a/src/zarr/core/buffer/core.py
+++ b/src/zarr/core/buffer/core.py
@@ -472,7 +472,9 @@ def all_equal(self, other: Any, equal_nan: bool = True) -> bool:
         return np.array_equal(
             self._data,
             other,
-            equal_nan=equal_nan if self._data.dtype.kind not in "USTOV" else False,
+            equal_nan=equal_nan
+            if self._data.dtype.kind not in ("U", "S", "T", "O", "V")
+            else False,
         )
 
     def fill(self, value: Any) -> None:
diff --git a/src/zarr/core/codec_pipeline.py b/src/zarr/core/codec_pipeline.py
index 315dbb77a9..5ee4f03799 100644
--- a/src/zarr/core/codec_pipeline.py
+++ b/src/zarr/core/codec_pipeline.py
@@ -63,7 +63,7 @@ def fill_value_or_default(chunk_spec: ArraySpec) -> Any:
         # validated when decoding the metadata, but we support reading
         # Zarr V2 data and need to support the case where fill_value
         # is None.
-        return chunk_spec.dtype.default_value
+        return chunk_spec.dtype.default_value()
     else:
         return fill_value
 
diff --git a/src/zarr/core/metadata/dtype.py b/src/zarr/core/metadata/dtype.py
index 590ab7df67..17e67fbb05 100644
--- a/src/zarr/core/metadata/dtype.py
+++ b/src/zarr/core/metadata/dtype.py
@@ -16,6 +16,7 @@
     TypeVar,
     cast,
     get_args,
+    get_origin,
 )
 
 import numpy as np
@@ -133,7 +134,7 @@ def float_to_json_v2(data: float | np.floating[Any]) -> JSONFloat:
 
 def float_to_json_v3(data: float | np.floating[Any]) -> JSONFloat:
     # v3 can in principle handle distinct NaN values, but numpy does not represent these explicitly
-    # so we just re-use the v2 routine here
+    # so we just reuse the v2 routine here
     return float_to_json_v2(data)
 
 
@@ -148,11 +149,11 @@ def float_to_json(data: float | np.floating[Any], zarr_format: ZarrFormat) -> JS
     raise ValueError(f"Invalid zarr format: {zarr_format}. Expected 2 or 3.")
 
 
-def complex_to_json_v2(data: complex | np.complexfloating[Any]) -> tuple[JSONFloat, JSONFloat]:
+def complex_to_json_v2(data: complex | np.complexfloating[Any, Any]) -> tuple[JSONFloat, JSONFloat]:
     return float_to_json_v2(data.real), float_to_json_v2(data.imag)
 
 
-def complex_to_json_v3(data: complex | np.complexfloating[Any]) -> tuple[JSONFloat, JSONFloat]:
+def complex_to_json_v3(data: complex | np.complexfloating[Any, Any]) -> tuple[JSONFloat, JSONFloat]:
     return float_to_json_v3(data.real), float_to_json_v3(data.imag)
 
 
@@ -226,15 +227,16 @@ def complex_from_json(
 
 
 def datetime_to_json(data: np.datetime64[Any]) -> int:
-    return data.view("int").item()
+    return data.view(np.int64).item()
 
 
 def datetime_from_json(data: int, unit: DateUnit | TimeUnit) -> np.datetime64[Any]:
     return np.int64(data).view(f"datetime64[{unit}]")
 
 
+TScalar = TypeVar("TScalar", bound=np.generic | str, covariant=True)
+# TODO: figure out an interface or protocol that non-numpy dtypes can
 TDType = TypeVar("TDType", bound=np.dtype[Any])
-TScalar = TypeVar("TScalar", bound=np.generic | str)
 
 
 @dataclass(frozen=True, kw_only=True)
@@ -244,17 +246,27 @@ class DTypeWrapper(Generic[TDType, TScalar], ABC, Metadata):
     endianness: Endianness | None = "native"
 
     def __init_subclass__(cls) -> None:
-        # Subclasses will bind the first generic type parameter to an attribute of the class
         # TODO: wrap this in some *very informative* error handling
         generic_args = get_args(get_original_bases(cls)[0])
-        cls.dtype_cls = generic_args[0]
+        # the logic here is that if a subclass was created with generic type parameters
+        # specified explicitly, then we bind that type parameter to the dtype_cls attribute
+        if len(generic_args) > 0:
+            cls.dtype_cls = generic_args[0]
+        else:
+            # but if the subclass was created without generic type parameters specified explicitly,
+            # then we check the parent DTypeWrapper classes and retrieve their generic type parameters
+            for base in cls.__orig_bases__:
+                if get_origin(base) is DTypeWrapper:
+                    generic_args = get_args(base)
+                    cls.dtype_cls = generic_args[0]
+                    break
         return super().__init_subclass__()
 
     def to_dict(self) -> dict[str, JSON]:
         return {"name": self.name}
 
     def cast_value(self: Self, value: object) -> TScalar:
-        return cast(np.generic, self.unwrap().type(value))
+        return cast(TScalar, self.unwrap().type(value))
 
     @abstractmethod
     def default_value(self) -> TScalar: ...
@@ -455,7 +467,7 @@ def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.complex1
 @dataclass(frozen=True, kw_only=True)
 class FlexibleWrapperBase(DTypeWrapper[TDType, TScalar]):
     item_size_bits: ClassVar[int]
-    length: int
+    length: int = 0
 
     @classmethod
     def _wrap_unsafe(cls, dtype: TDType) -> Self:
@@ -467,7 +479,7 @@ def unwrap(self) -> TDType:
 
 
 @dataclass(frozen=True, kw_only=True)
-class StaticByteString(FlexibleWrapperBase[np.dtypes.BytesDType, np.bytes_]):
+class FixedLengthAsciiString(FlexibleWrapperBase[np.dtypes.BytesDType, np.bytes_]):
     name = "numpy/static_byte_string"
     item_size_bits = 8
 
@@ -492,11 +504,18 @@ class StaticRawBytes(FlexibleWrapperBase[np.dtypes.VoidDType, np.void]):
     item_size_bits = 8
 
     def default_value(self) -> np.void:
-        return np.void(b"")
+        return self.cast_value(("\x00" * self.length).encode("ascii"))
 
     def to_dict(self) -> dict[str, JSON]:
         return {"name": f"r{self.length * self.item_size_bits}"}
 
+    @classmethod
+    def check_dtype(cls: type[Self], dtype: TDType) -> TypeGuard[TDType]:
+        """
+        Reject structured dtypes by ensuring that dtype.fields is None
+        """
+        return type(dtype) is cls.dtype_cls and dtype.fields is None
+
     def unwrap(self) -> np.dtypes.VoidDType:
         # this needs to be overridden because numpy does not allow creating a void type
         # by invoking np.dtypes.VoidDType directly
@@ -512,7 +531,7 @@ def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.void:
 
 
 @dataclass(frozen=True, kw_only=True)
-class StaticUnicodeString(FlexibleWrapperBase[np.dtypes.StrDType, np.str_]):
+class FixedLengthUnicodeString(FlexibleWrapperBase[np.dtypes.StrDType, np.str_]):
     name = "numpy/static_unicode_string"
     item_size_bits = 32  # UCS4 is 32 bits per code point
 
@@ -599,7 +618,7 @@ def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> str:
 @dataclass(frozen=True, kw_only=True)
 class DateTime64(DTypeWrapper[np.dtypes.DateTime64DType, np.datetime64]):
     name = "numpy/datetime64"
-    unit: DateUnit | TimeUnit
+    unit: DateUnit | TimeUnit = "s"
 
     def default_value(self) -> np.datetime64:
         return np.datetime64("NaT")
@@ -609,6 +628,9 @@ def _wrap_unsafe(cls, dtype: np.dtypes.DateTime64DType) -> Self:
         unit = dtype.name[dtype.name.rfind("[") + 1 : dtype.name.rfind("]")]
         return cls(unit=unit)
 
+    def cast_value(self, value: object) -> np.datetime64:
+        return self.unwrap().type(value, self.unit)
+
     def unwrap(self) -> np.dtypes.DateTime64DType:
         return np.dtype(f"datetime64[{self.unit}]").newbyteorder(
             endianness_to_numpy_str(self.endianness)
@@ -651,6 +673,26 @@ def _wrap_unsafe(cls, dtype: np.dtypes.VoidDType) -> Self:
 
         return cls(fields=tuple(fields))
 
+    def to_dict(self) -> dict[str, JSON]:
+        base_dict = super().to_dict()
+        if base_dict.get("configuration", {}) != {}:
+            raise ValueError(
+                "This data type wrapper cannot inherit from a data type wrapper that defines a configuration for its dict serialization"
+            )
+        field_configs = [
+            (f_name, f_dtype.to_dict(), f_offset) for f_name, f_dtype, f_offset in self.fields
+        ]
+        base_dict["configuration"] = {"fields": field_configs}
+        return base_dict
+
+    @classmethod
+    def from_dict(cls, data: dict[str, JSON]) -> Self:
+        fields = tuple(
+            (f_name, get_data_type_from_dict(f_dtype), f_offset)
+            for f_name, f_dtype, f_offset in data["fields"]
+        )
+        return cls(fields=fields)
+
     def unwrap(self) -> np.dtypes.VoidDType:
         return np.dtype([(key, dtype.unwrap()) for (key, dtype, _) in self.fields])
 
@@ -665,7 +707,7 @@ def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.void:
         return np.array([as_bytes], dtype=dtype.str).view(dtype)[0]
 
 
-def get_data_type_from_numpy(dtype: npt.DTypeLike) -> DTypeWrapper:
+def get_data_type_from_numpy(dtype: npt.DTypeLike) -> DTypeWrapper[Any, Any]:
     if dtype in (str, "str"):
         if _NUMPY_SUPPORTS_VLEN_STRING:
             np_dtype = np.dtype("T")
@@ -674,17 +716,10 @@ def get_data_type_from_numpy(dtype: npt.DTypeLike) -> DTypeWrapper:
     else:
         np_dtype = np.dtype(dtype)
     data_type_registry.lazy_load()
-    for val in data_type_registry.contents.values():
-        try:
-            return val.wrap(np_dtype)
-        except TypeError:
-            pass
-    raise ValueError(
-        f"numpy dtype '{dtype}' does not have a corresponding Zarr dtype in: {list(data_type_registry.contents)}."
-    )
+    return data_type_registry.match_dtype(np_dtype)
 
 
-def get_data_type_from_dict(dtype: dict[str, JSON]) -> DTypeWrapper:
+def get_data_type_from_dict(dtype: dict[str, JSON]) -> DTypeWrapper[Any.Any]:
     data_type_registry.lazy_load()
     dtype_name = dtype["name"]
     dtype_cls = data_type_registry.get(dtype_name)
@@ -737,14 +772,14 @@ def register(self: Self, cls: type[DTypeWrapper[Any, Any]]) -> None:
     def get(self, key: str) -> type[DTypeWrapper[Any, Any]]:
         return self.contents[key]
 
-    def match_dtype(self, dtype: npt.DTypeLike) -> DTypeWrapper[Any, Any]:
+    def match_dtype(self, dtype: TDType) -> DTypeWrapper[Any, Any]:
         self.lazy_load()
         for val in self.contents.values():
             try:
                 return val.wrap(dtype)
             except TypeError:
                 pass
-        raise ValueError(f"No data type wrapper found that matches {dtype}")
+        raise ValueError(f"No data type wrapper found that matches dtype '{dtype}'")
 
 
 def register_data_type(cls: type[DTypeWrapper[Any, Any]]) -> None:
@@ -756,7 +791,7 @@ def register_data_type(cls: type[DTypeWrapper[Any, Any]]) -> None:
 INTEGER_DTYPE = Int8 | Int16 | Int32 | Int64 | UInt8 | UInt16 | UInt32 | UInt64
 FLOAT_DTYPE = Float16 | Float32 | Float64
 COMPLEX_DTYPE = Complex64 | Complex128
-STRING_DTYPE = StaticUnicodeString | VariableLengthString | StaticByteString
+STRING_DTYPE = FixedLengthUnicodeString | VariableLengthString | FixedLengthAsciiString
 DTYPE = (
     Bool
     | INTEGER_DTYPE
diff --git a/src/zarr/core/metadata/v2.py b/src/zarr/core/metadata/v2.py
index ebf174eff3..cb09a35bec 100644
--- a/src/zarr/core/metadata/v2.py
+++ b/src/zarr/core/metadata/v2.py
@@ -3,7 +3,6 @@
 import base64
 import warnings
 from collections.abc import Iterable
-from enum import Enum
 from typing import TYPE_CHECKING, TypedDict, cast
 
 import numcodecs.abc
@@ -11,8 +10,7 @@
 from zarr.abc.metadata import Metadata
 from zarr.core.metadata.dtype import (
     DTypeWrapper,
-    StaticByteString,
-    StaticRawBytes,
+    Structured,
     get_data_type_from_numpy,
 )
 
@@ -109,49 +107,12 @@ def shards(self) -> ChunkCoords | None:
         return None
 
     def to_buffer_dict(self, prototype: BufferPrototype) -> dict[str, Buffer]:
-        def _json_convert(
-            o: Any,
-        ) -> Any:
-            if isinstance(o, np.dtype):
-                if o.fields is None:
-                    return o.str
-                else:
-                    return o.descr
-            if isinstance(o, numcodecs.abc.Codec):
-                codec_config = o.get_config()
-
-                # Hotfix for https://github.com/zarr-developers/zarr-python/issues/2647
-                if codec_config["id"] == "zstd" and not codec_config.get("checksum", False):
-                    codec_config.pop("checksum", None)
-
-                return codec_config
-            if np.isscalar(o):
-                out: Any
-                if hasattr(o, "dtype") and o.dtype.kind == "M" and hasattr(o, "view"):
-                    # https://github.com/zarr-developers/zarr-python/issues/2119
-                    # `.item()` on a datetime type might or might not return an
-                    # integer, depending on the value.
-                    # Explicitly cast to an int first, and then grab .item()
-                    out = o.view("i8").item()
-                else:
-                    # convert numpy scalar to python type, and pass
-                    # python types through
-                    out = getattr(o, "item", lambda: o)()
-                    if isinstance(out, complex):
-                        # python complex types are not JSON serializable, so we use the
-                        # serialization defined in the zarr v3 spec
-                        return [out.real, out.imag]
-                return out
-            if isinstance(o, Enum):
-                return o.name
-            raise TypeError
-
         zarray_dict = self.to_dict()
         zattrs_dict = zarray_dict.pop("attributes", {})
         json_indent = config.get("json_indent")
         return {
             ZARRAY_JSON: prototype.buffer.from_bytes(
-                json.dumps(zarray_dict, default=_json_convert, indent=json_indent).encode()
+                json.dumps(zarray_dict, indent=json_indent).encode()
             ),
             ZATTRS_JSON: prototype.buffer.from_bytes(
                 json.dumps(zattrs_dict, indent=json_indent).encode()
@@ -196,11 +157,19 @@ def from_dict(cls, data: dict[str, Any]) -> ArrayV2Metadata:
 
     def to_dict(self) -> dict[str, JSON]:
         zarray_dict = super().to_dict()
+        if isinstance(zarray_dict["compressor"], numcodecs.abc.Codec):
+            zarray_dict["compressor"] = zarray_dict["compressor"].get_config()
+        if zarray_dict["filters"] is not None:
+            raw_filters = zarray_dict["filters"]
+            new_filters = []
+            for f in raw_filters:
+                if isinstance(f, numcodecs.abc.Codec):
+                    new_filters.append(f.get_config())
+                else:
+                    new_filters.append(f)
+            zarray_dict["filters"] = new_filters
 
-        if (
-            isinstance(self.dtype, StaticByteString | StaticRawBytes)
-            and self.fill_value is not None
-        ):
+        if self.fill_value is not None:
             # There's a relationship between self.dtype and self.fill_value
             # that mypy isn't aware of. The fact that we have S or V dtype here
             # means we should have a bytes-type fill_value.
@@ -209,10 +178,7 @@ def to_dict(self) -> dict[str, JSON]:
 
         _ = zarray_dict.pop("dtype")
         dtype_json: JSON
-        # TODO: Replace this with per-dtype method
-        # In the case of zarr v2, the simplest i.e., '|VXX' dtype is represented as a string
-        dtype_descr = self.dtype.unwrap().descr
-        if self.dtype.unwrap().kind == "V" and dtype_descr[0][0] != "" and len(dtype_descr) != 0:
+        if isinstance(self.dtype, Structured):
             dtype_json = tuple(self.dtype.unwrap().descr)
         else:
             dtype_json = self.dtype.unwrap().str
diff --git a/tests/test_array.py b/tests/test_array.py
index d54001b54e..5c58b3d3be 100644
--- a/tests/test_array.py
+++ b/tests/test_array.py
@@ -428,31 +428,6 @@ async def test_nbytes_stored_async() -> None:
     assert result == 902  # the size with all chunks filled.
 
 
-def test_default_fill_values() -> None:
-    a = zarr.Array.create(MemoryStore(), shape=5, chunk_shape=5, dtype="<U4")
-    assert a.fill_value == ""
-
-    b = zarr.Array.create(MemoryStore(), shape=5, chunk_shape=5, dtype="<S4")
-    assert b.fill_value == b""
-
-    c = zarr.Array.create(MemoryStore(), shape=5, chunk_shape=5, dtype="i")
-    assert c.fill_value == 0
-
-    d = zarr.Array.create(MemoryStore(), shape=5, chunk_shape=5, dtype="f")
-    assert d.fill_value == 0.0
-
-
-def test_vlen_errors() -> None:
-    with pytest.raises(ValueError, match="At least one ArrayBytesCodec is required."):
-        Array.create(MemoryStore(), shape=5, chunks=5, dtype="<U4", codecs=[])
-
-    with pytest.raises(
-        ValueError,
-        match="For string dtype, ArrayBytesCodec must be `VLenUTF8Codec`, got `BytesCodec`.",
-    ):
-        Array.create(MemoryStore(), shape=5, chunks=5, dtype="O", codecs=[BytesCodec()])
-
-
 @pytest.mark.parametrize("zarr_format", [2, 3])
 def test_update_attrs(zarr_format: ZarrFormat) -> None:
     # regression test for https://github.com/zarr-developers/zarr-python/issues/2328
@@ -1061,7 +1036,7 @@ async def test_v2_chunk_encoding(
             filters=filters,
         )
         filters_expected, compressor_expected = _parse_chunk_encoding_v2(
-            filters=filters, compressor=compressors, dtype=np.dtype(dtype)
+            filters=filters, compressor=compressors, dtype=get_data_type_from_numpy(dtype)
         )
         assert arr.metadata.zarr_format == 2  # guard for mypy
         assert arr.metadata.compressor == compressor_expected
diff --git a/tests/test_metadata/test_dtype.py b/tests/test_metadata/test_dtype.py
index a3f29a34f5..d0a0243a9f 100644
--- a/tests/test_metadata/test_dtype.py
+++ b/tests/test_metadata/test_dtype.py
@@ -1,17 +1,20 @@
 from __future__ import annotations
 
-from typing import Any
+from typing import Any, get_args
 
 import numpy as np
 import pytest
 
 from zarr.core.metadata.dtype import (
+    DTYPE,
     Bool,
     Complex64,
     Complex128,
     DataTypeRegistry,
     DateTime64,
     DTypeWrapper,
+    FixedLengthAsciiString,
+    FixedLengthUnicodeString,
     Float16,
     Float32,
     Float64,
@@ -19,17 +22,22 @@
     Int16,
     Int32,
     Int64,
-    StaticByteString,
     StaticRawBytes,
-    StaticUnicodeString,
     Structured,
     UInt8,
     UInt16,
     UInt32,
     UInt64,
     VariableLengthString,
+    data_type_registry,
 )
 
+
+@pytest.fixture
+def dtype_registry() -> DataTypeRegistry:
+    return DataTypeRegistry()
+
+
 _NUMPY_SUPPORTS_VLEN_STRING = hasattr(np.dtypes, "StringDType")
 if _NUMPY_SUPPORTS_VLEN_STRING:
     VLEN_STRING_DTYPE = np.dtypes.StringDType()
@@ -55,8 +63,8 @@
         (Float64, "float64"),
         (Complex64, "complex64"),
         (Complex128, "complex128"),
-        (StaticUnicodeString, "U"),
-        (StaticByteString, "S"),
+        (FixedLengthUnicodeString, "U"),
+        (FixedLengthAsciiString, "S"),
         (StaticRawBytes, "V"),
         (VariableLengthString, VLEN_STRING_CODE),
         (Structured, np.dtype([("a", np.float64), ("b", np.int8)])),
@@ -80,23 +88,14 @@ def test_wrap(wrapper_cls: type[DTypeWrapper[Any, Any]], np_dtype: np.dtype | st
     assert wrapped.unwrap() == dt
 
 
-def test_registry_match() -> None:
-    """
-    Test that registering a dtype in a data type registry works
-    Test that match_dtype resolves a numpy dtype into the stored dtype
-    Test that match_dtype raises an error if the dtype is not registered
-    """
-    local_registry = DataTypeRegistry()
-    local_registry.register(Bool)
-    assert isinstance(local_registry.match_dtype(np.dtype("bool")), Bool)
-    outside_dtype = "int8"
-    with pytest.raises(
-        ValueError, match=f"No data type wrapper found that matches {outside_dtype}"
-    ):
-        local_registry.match_dtype(np.dtype(outside_dtype))
-
-
-# start writing new tests here
+@pytest.mark.parametrize("wrapper_cls", get_args(DTYPE))
+def test_dict_serialization(wrapper_cls: DTYPE) -> None:
+    if issubclass(wrapper_cls, Structured):
+        instance = wrapper_cls(fields=((("a", Bool(), 0),)))
+    else:
+        instance = wrapper_cls()
+    as_dict = instance.to_dict()
+    assert wrapper_cls.from_dict(data=as_dict.get("configuration", {})) == instance
 
 
 @pytest.mark.parametrize(
@@ -116,9 +115,9 @@ def test_registry_match() -> None:
         (Float64(), np.float64(0)),
         (Complex64(), np.complex64(0)),
         (Complex128(), np.complex128(0)),
-        (StaticByteString(length=3), np.bytes_(b"")),
+        (FixedLengthAsciiString(length=3), np.bytes_(b"")),
         (StaticRawBytes(length=3), np.void(b"")),
-        (StaticUnicodeString(length=3), np.str_("")),
+        (FixedLengthUnicodeString(length=3), np.str_("")),
         (
             Structured(fields=(("a", Float64(), 0), ("b", Int8(), 8))),
             np.array([0], dtype=[("a", np.float64), ("b", np.int8)])[0],
@@ -154,9 +153,9 @@ def test_default_value(wrapper: type[DTypeWrapper[Any, Any]], expected_default:
         (Float64(), np.float64(42.0), 42.0),
         (Complex64(), np.complex64(42.0 + 1.0j), (42.0, 1.0)),
         (Complex128(), np.complex128(42.0 + 1.0j), (42.0, 1.0)),
-        (StaticByteString(length=4), np.bytes_(b"test"), "dGVzdA=="),
+        (FixedLengthAsciiString(length=4), np.bytes_(b"test"), "dGVzdA=="),
         (StaticRawBytes(length=4), np.void(b"test"), "dGVzdA=="),
-        (StaticUnicodeString(length=4), np.str_("test"), "test"),
+        (FixedLengthUnicodeString(length=4), np.str_("test"), "test"),
         (VariableLengthString(), "test", "test"),
         (DateTime64(unit="s"), np.datetime64("2021-01-01T00:00:00", "s"), 1609459200),
     ],
@@ -187,9 +186,9 @@ def test_to_json_value_v2(
         (Float64(), 42.0, np.float64(42.0)),
         (Complex64(), (42.0, 1.0), np.complex64(42.0 + 1.0j)),
         (Complex128(), (42.0, 1.0), np.complex128(42.0 + 1.0j)),
-        (StaticByteString(length=4), "dGVzdA==", np.bytes_(b"test")),
+        (FixedLengthAsciiString(length=4), "dGVzdA==", np.bytes_(b"test")),
         (StaticRawBytes(length=4), "dGVzdA==", np.void(b"test")),
-        (StaticUnicodeString(length=4), "test", np.str_("test")),
+        (FixedLengthUnicodeString(length=4), "test", np.str_("test")),
         (VariableLengthString(), "test", "test"),
         (DateTime64(unit="s"), 1609459200, np.datetime64("2021-01-01T00:00:00", "s")),
     ],
@@ -201,3 +200,68 @@ def test_from_json_value(
     Test the from_json_value method for each dtype wrapper.
     """
     assert wrapper.from_json_value(json_value, zarr_format=2) == expected_value
+
+
+class TestRegistry:
+    @staticmethod
+    def test_register(dtype_registry: DataTypeRegistry) -> None:
+        """
+        Test that registering a dtype in a data type registry works.
+        """
+        dtype_registry.register(Bool)
+        assert dtype_registry.get("bool") == Bool
+        assert isinstance(dtype_registry.match_dtype(np.dtype("bool")), Bool)
+
+    @staticmethod
+    def test_override(dtype_registry: DataTypeRegistry) -> None:
+        """
+        Test that registering a new dtype with the same name works (overriding the previous one).
+        """
+        dtype_registry.register(Bool)
+
+        class NewBool(Bool):
+            def default_value(self) -> np.bool_:
+                return np.True_
+
+        dtype_registry.register(NewBool)
+        assert isinstance(dtype_registry.match_dtype(np.dtype("bool")), NewBool)
+
+    @staticmethod
+    @pytest.mark.parametrize(
+        ("wrapper_cls", "dtype_str"), [(Bool, "bool"), (FixedLengthUnicodeString, "|U4")]
+    )
+    def test_match_dtype(
+        dtype_registry: DataTypeRegistry, wrapper_cls: type[DTypeWrapper[Any, Any]], dtype_str: str
+    ) -> None:
+        """
+        Test that match_dtype resolves a numpy dtype into an instance of the correspond wrapper for that dtype.
+        """
+        dtype_registry.register(wrapper_cls)
+        assert isinstance(dtype_registry.match_dtype(np.dtype(dtype_str)), wrapper_cls)
+
+    @staticmethod
+    def test_unregistered_dtype(dtype_registry: DataTypeRegistry) -> None:
+        """
+        Test that match_dtype raises an error if the dtype is not registered.
+        """
+        outside_dtype = "int8"
+        with pytest.raises(
+            ValueError, match=f"No data type wrapper found that matches dtype '{outside_dtype}'"
+        ):
+            dtype_registry.match_dtype(np.dtype(outside_dtype))
+
+        with pytest.raises(KeyError):
+            dtype_registry.get(outside_dtype)
+
+    @staticmethod
+    @pytest.mark.parametrize("wrapper_cls", get_args(DTYPE))
+    def test_registered_dtypes(wrapper_cls: DTypeWrapper[Any, Any]) -> None:
+        """
+        Test that the registered dtypes can be retrieved from the registry.
+        """
+        if issubclass(wrapper_cls, Structured):
+            instance = wrapper_cls(fields=((("a", Bool(), 0),)))
+        else:
+            instance = wrapper_cls()
+
+        assert data_type_registry.match_dtype(instance.unwrap()) == instance
diff --git a/tests/test_metadata/test_v3.py b/tests/test_metadata/test_v3.py
index 37d8704b50..ea59496280 100644
--- a/tests/test_metadata/test_v3.py
+++ b/tests/test_metadata/test_v3.py
@@ -12,7 +12,7 @@
 from zarr.core.chunk_key_encodings import DefaultChunkKeyEncoding, V2ChunkKeyEncoding
 from zarr.core.config import config
 from zarr.core.group import GroupMetadata, parse_node_type
-from zarr.core.metadata.dtype import complex_from_json, get_data_type_from_numpy
+from zarr.core.metadata.dtype import DateTime64, complex_from_json, get_data_type_from_numpy
 from zarr.core.metadata.v3 import (
     ArrayV3Metadata,
     parse_dimension_names,
@@ -266,19 +266,19 @@ def test_json_indent(indent: int):
         assert d == json.dumps(json.loads(d), indent=indent).encode()
 
 
-@pytest.mark.xfail(reason="Data type not supported yet")
 @pytest.mark.parametrize("fill_value", [-1, 0, 1, 2932897])
 @pytest.mark.parametrize("precision", ["ns", "D"])
 async def test_datetime_metadata(fill_value: int, precision: str) -> None:
+    dtype = DateTime64(unit=precision)
     metadata_dict = {
         "zarr_format": 3,
         "node_type": "array",
         "shape": (1,),
         "chunk_grid": {"name": "regular", "configuration": {"chunk_shape": (1,)}},
-        "data_type": f"<M8[{precision}]",
+        "data_type": dtype.to_dict(),
         "chunk_key_encoding": {"name": "default", "separator": "."},
-        "codecs": (),
-        "fill_value": np.datetime64(fill_value, precision),
+        "codecs": (BytesCodec(),),
+        "fill_value": dtype.to_json_value(dtype.cast_value(fill_value), zarr_format=3),
     }
     metadata = ArrayV3Metadata.from_dict(metadata_dict)
     # ensure there isn't a TypeError here.
@@ -331,11 +331,3 @@ async def test_special_float_fill_values(fill_value: str) -> None:
     elif fill_value == "-Infinity":
         assert np.isneginf(m.fill_value)
         assert d["fill_value"] == "-Infinity"
-
-
-@pytest.mark.parametrize("dtype_str", dtypes)
-def test_dtypes(dtype_str: str) -> None:
-    dt = get_data_type_from_numpy(dtype_str)
-    np_dtype = dt.unwrap()
-    assert isinstance(np_dtype, dt.dtype_cls)
-    assert np_dtype.type(0) == dt.cast_value(0)
diff --git a/tests/test_v2.py b/tests/test_v2.py
index c5ed39472f..f3dec247b7 100644
--- a/tests/test_v2.py
+++ b/tests/test_v2.py
@@ -40,33 +40,6 @@ def test_simple(store: StorePath) -> None:
     assert np.array_equal(data, a[:, :])
 
 
-@pytest.mark.parametrize("store", ["memory"], indirect=True)
-@pytest.mark.parametrize(
-    ("dtype", "fill_value"),
-    [
-        ("bool", False),
-        ("int64", 0),
-        ("float64", 0.0),
-        ("|S1", b""),
-        ("|U1", ""),
-        ("object", ""),
-        (str, ""),
-    ],
-)
-def test_implicit_fill_value(store: MemoryStore, dtype: str, fill_value: Any) -> None:
-    arr = zarr.create(store=store, shape=(4,), fill_value=None, zarr_format=2, dtype=dtype)
-    assert arr.metadata.fill_value is None
-    assert arr.metadata.to_dict()["fill_value"] is None
-    result = arr[:]
-    if dtype is str:
-        # special case
-        numpy_dtype = np.dtype(object)
-    else:
-        numpy_dtype = np.dtype(dtype)
-    expected = np.full(arr.shape, fill_value, dtype=numpy_dtype)
-    np.testing.assert_array_equal(result, expected)
-
-
 def test_codec_pipeline() -> None:
     # https://github.com/zarr-developers/zarr-python/issues/2243
     store = MemoryStore()
@@ -127,7 +100,7 @@ async def test_v2_encode_decode(dtype, expected_dtype, fill_value, fill_value_js
         np.testing.assert_equal(data, expected)
 
 
-@pytest.mark.parametrize(("dtype", "value"), [("|S1", b"Y"), ("|U1", "Y"), ("O", "Y")])
+@pytest.mark.parametrize(("dtype", "value"), [("|S1", b"Y"), ("|U1", "Y"), (str, "Y")])
 def test_v2_encode_decode_with_data(dtype, value):
     dtype, value = dtype, value
     expected = np.full((3,), value, dtype=dtype)
@@ -141,18 +114,6 @@ def test_v2_encode_decode_with_data(dtype, value):
     np.testing.assert_equal(data, expected)
 
 
-@pytest.mark.parametrize("dtype", [str, "str"])
-async def test_create_dtype_str(dtype: Any) -> None:
-    data = ["a", "bb", "ccc"]
-    arr = zarr.create(shape=3, dtype=dtype, zarr_format=2)
-    assert arr.dtype.kind == "O"
-    assert arr.metadata.to_dict()["dtype"] == "|O"
-    assert arr.metadata.filters == (numcodecs.vlen.VLenUTF8(),)
-    arr[:] = data
-    result = arr[:]
-    np.testing.assert_array_equal(result, np.array(data, dtype="object"))
-
-
 @pytest.mark.parametrize("filters", [[], [numcodecs.Delta(dtype="<i4")], [numcodecs.Zlib(level=2)]])
 @pytest.mark.parametrize("order", ["C", "F"])
 def test_v2_filters_codecs(filters: Any, order: Literal["C", "F"]) -> None:

From 9ab0b1ee8c43e67436fa053444b1fa59d1052ed8 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Wed, 5 Mar 2025 19:50:54 +0100
Subject: [PATCH 018/130] fix dtype mechanics in bytescodec

---
 src/zarr/codecs/bytes.py | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/src/zarr/codecs/bytes.py b/src/zarr/codecs/bytes.py
index 1da497ea72..cd9e6d89e9 100644
--- a/src/zarr/codecs/bytes.py
+++ b/src/zarr/codecs/bytes.py
@@ -71,14 +71,8 @@ async def _decode_single(
         chunk_spec: ArraySpec,
     ) -> NDBuffer:
         assert isinstance(chunk_bytes, Buffer)
-        if chunk_spec.dtype.unwrap().itemsize > 0:
-            if self.endian == Endian.little:
-                prefix = "<"
-            else:
-                prefix = ">"
-            dtype = np.dtype(f"{prefix}{chunk_spec.dtype.unwrap().str[1:]}")
-        else:
-            dtype = np.dtype(f"|{chunk_spec.dtype.unwrap().str[1:]}")
+
+        dtype = chunk_spec.dtype.with_endianness(self.endian).unwrap()
 
         as_array_like = chunk_bytes.as_array_like()
         if isinstance(as_array_like, NDArrayLike):

From e14279d66868a4d2f78c162612ee1a34db553358 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Fri, 7 Mar 2025 23:14:22 +0100
Subject: [PATCH 019/130] remove __post_init__ magic in favor of more explicit
 declaration

---
 src/zarr/codecs/bytes.py        |  5 ++--
 src/zarr/core/metadata/dtype.py | 48 +++++++++++++++++----------------
 2 files changed, 28 insertions(+), 25 deletions(-)

diff --git a/src/zarr/codecs/bytes.py b/src/zarr/codecs/bytes.py
index cd9e6d89e9..9a5a217abf 100644
--- a/src/zarr/codecs/bytes.py
+++ b/src/zarr/codecs/bytes.py
@@ -71,8 +71,9 @@ async def _decode_single(
         chunk_spec: ArraySpec,
     ) -> NDBuffer:
         assert isinstance(chunk_bytes, Buffer)
-
-        dtype = chunk_spec.dtype.with_endianness(self.endian).unwrap()
+        # TODO: remove endianness enum in favor of literal union
+        endian_str = self.endian.value if self.endian is not None else None
+        dtype = chunk_spec.dtype.with_endianness(endian_str).unwrap()
 
         as_array_like = chunk_bytes.as_array_like()
         if isinstance(as_array_like, NDArrayLike):
diff --git a/src/zarr/core/metadata/dtype.py b/src/zarr/core/metadata/dtype.py
index 17e67fbb05..33aa22b398 100644
--- a/src/zarr/core/metadata/dtype.py
+++ b/src/zarr/core/metadata/dtype.py
@@ -16,12 +16,10 @@
     TypeVar,
     cast,
     get_args,
-    get_origin,
 )
 
 import numpy as np
 import numpy.typing as npt
-from typing_extensions import get_original_bases
 
 from zarr.abc.metadata import Metadata
 from zarr.core.strings import _NUMPY_SUPPORTS_VLEN_STRING
@@ -245,23 +243,6 @@ class DTypeWrapper(Generic[TDType, TScalar], ABC, Metadata):
     dtype_cls: ClassVar[type[TDType]]  # this class will create a numpy dtype
     endianness: Endianness | None = "native"
 
-    def __init_subclass__(cls) -> None:
-        # TODO: wrap this in some *very informative* error handling
-        generic_args = get_args(get_original_bases(cls)[0])
-        # the logic here is that if a subclass was created with generic type parameters
-        # specified explicitly, then we bind that type parameter to the dtype_cls attribute
-        if len(generic_args) > 0:
-            cls.dtype_cls = generic_args[0]
-        else:
-            # but if the subclass was created without generic type parameters specified explicitly,
-            # then we check the parent DTypeWrapper classes and retrieve their generic type parameters
-            for base in cls.__orig_bases__:
-                if get_origin(base) is DTypeWrapper:
-                    generic_args = get_args(base)
-                    cls.dtype_cls = generic_args[0]
-                    break
-        return super().__init_subclass__()
-
     def to_dict(self) -> dict[str, JSON]:
         return {"name": self.name}
 
@@ -314,6 +295,7 @@ def from_json_value(self: Self, data: JSON, *, zarr_format: ZarrFormat) -> TScal
 @dataclass(frozen=True, kw_only=True)
 class Bool(DTypeWrapper[np.dtypes.BoolDType, np.bool_]):
     name = "bool"
+    dtype_cls: ClassVar[type[np.dtypes.BoolDType]] = np.dtypes.BoolDType
 
     def default_value(self) -> np.bool_:
         return np.False_
@@ -350,41 +332,49 @@ def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> TScalar:
 
 @dataclass(frozen=True, kw_only=True)
 class Int8(IntWrapperBase[np.dtypes.Int8DType, np.int8]):
+    dtype_cls = np.dtypes.Int8DType
     name = "int8"
 
 
 @dataclass(frozen=True, kw_only=True)
 class UInt8(IntWrapperBase[np.dtypes.UInt8DType, np.uint8]):
+    dtype_cls = np.dtypes.UInt8DType
     name = "uint8"
 
 
 @dataclass(frozen=True, kw_only=True)
 class Int16(IntWrapperBase[np.dtypes.Int16DType, np.int16]):
+    dtype_cls = np.dtypes.Int16DType
     name = "int16"
 
 
 @dataclass(frozen=True, kw_only=True)
 class UInt16(IntWrapperBase[np.dtypes.UInt16DType, np.uint16]):
+    dtype_cls = np.dtypes.UInt16DType
     name = "uint16"
 
 
 @dataclass(frozen=True, kw_only=True)
 class Int32(IntWrapperBase[np.dtypes.Int32DType, np.int32]):
+    dtype_cls = np.dtypes.Int32DType
     name = "int32"
 
 
 @dataclass(frozen=True, kw_only=True)
 class UInt32(IntWrapperBase[np.dtypes.UInt32DType, np.uint32]):
+    dtype_cls = np.dtypes.UInt32DType
     name = "uint32"
 
 
 @dataclass(frozen=True, kw_only=True)
 class Int64(IntWrapperBase[np.dtypes.Int64DType, np.int64]):
+    dtype_cls = np.dtypes.Int64DType
     name = "int64"
 
 
 @dataclass(frozen=True, kw_only=True)
 class UInt64(IntWrapperBase[np.dtypes.UInt64DType, np.uint64]):
+    dtype_cls = np.dtypes.UInt64DType
     name = "uint64"
 
 
@@ -407,21 +397,25 @@ def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> TScalar:
 
 @dataclass(frozen=True, kw_only=True)
 class Float16(FloatWrapperBase[np.dtypes.Float16DType, np.float16]):
+    dtype_cls = np.dtypes.Float16DType
     name = "float16"
 
 
 @dataclass(frozen=True, kw_only=True)
 class Float32(FloatWrapperBase[np.dtypes.Float32DType, np.float32]):
+    dtype_cls = np.dtypes.Float32DType
     name = "float32"
 
 
 @dataclass(frozen=True, kw_only=True)
 class Float64(FloatWrapperBase[np.dtypes.Float64DType, np.float64]):
+    dtype_cls = np.dtypes.Float64DType
     name = "float64"
 
 
 @dataclass(frozen=True, kw_only=True)
 class Complex64(DTypeWrapper[np.dtypes.Complex64DType, np.complex64]):
+    dtype_cls = np.dtypes.Complex64DType
     name = "complex64"
 
     def default_value(self) -> np.complex64:
@@ -444,6 +438,7 @@ def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.complex6
 
 @dataclass(frozen=True, kw_only=True)
 class Complex128(DTypeWrapper[np.dtypes.Complex128DType, np.complex128]):
+    dtype_cls = np.dtypes.Complex128DType
     name = "complex128"
 
     def default_value(self) -> np.complex128:
@@ -480,7 +475,8 @@ def unwrap(self) -> TDType:
 
 @dataclass(frozen=True, kw_only=True)
 class FixedLengthAsciiString(FlexibleWrapperBase[np.dtypes.BytesDType, np.bytes_]):
-    name = "numpy/static_byte_string"
+    dtype_cls = np.dtypes.BytesDType
+    name = "numpy.static_byte_string"
     item_size_bits = 8
 
     def default_value(self) -> np.bytes_:
@@ -500,6 +496,7 @@ def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.bytes_:
 
 @dataclass(frozen=True, kw_only=True)
 class StaticRawBytes(FlexibleWrapperBase[np.dtypes.VoidDType, np.void]):
+    dtype_cls = np.dtypes.VoidDType
     name = "r*"
     item_size_bits = 8
 
@@ -532,7 +529,8 @@ def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.void:
 
 @dataclass(frozen=True, kw_only=True)
 class FixedLengthUnicodeString(FlexibleWrapperBase[np.dtypes.StrDType, np.str_]):
-    name = "numpy/static_unicode_string"
+    dtype_cls = np.dtypes.StrDType
+    name = "numpy.static_unicode_string"
     item_size_bits = 32  # UCS4 is 32 bits per code point
 
     def default_value(self) -> np.str_:
@@ -554,7 +552,8 @@ def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.str_:
 
     @dataclass(frozen=True, kw_only=True)
     class VariableLengthString(DTypeWrapper[np.dtypes.StringDType, str]):
-        name = "numpy/vlen_string"
+        dtype_cls = np.dtypes.StringDType
+        name = "numpy.vlen_string"
 
         def default_value(self) -> str:
             return ""
@@ -582,7 +581,8 @@ def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> str:
 
     @dataclass(frozen=True, kw_only=True)
     class VariableLengthString(DTypeWrapper[np.dtypes.ObjectDType, str]):
-        name = "numpy/vlen_string"
+        dtype_cls = np.dtypes.ObjectDType
+        name = "numpy.vlen_string"
         endianness: Endianness = field(default=None)
 
         def default_value(self) -> str:
@@ -617,6 +617,7 @@ def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> str:
 
 @dataclass(frozen=True, kw_only=True)
 class DateTime64(DTypeWrapper[np.dtypes.DateTime64DType, np.datetime64]):
+    dtype_cls = np.dtypes.DateTime64DType
     name = "numpy/datetime64"
     unit: DateUnit | TimeUnit = "s"
 
@@ -647,6 +648,7 @@ def to_json_value(self, data: np.datetime64, *, zarr_format: ZarrFormat) -> int:
 
 @dataclass(frozen=True, kw_only=True)
 class Structured(DTypeWrapper[np.dtypes.VoidDType, np.void]):
+    dtype_cls = np.dtypes.VoidDType
     name = "numpy/struct"
     fields: tuple[tuple[str, DTypeWrapper[Any, Any], int], ...]
 

From 381a26436872be2db0217d9bd5046c5c9d8ae082 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Sun, 9 Mar 2025 12:53:32 +0100
Subject: [PATCH 020/130] fix tests

---
 src/zarr/core/metadata/v2.py      | 7 ++++++-
 tests/test_metadata/test_dtype.py | 2 +-
 tests/test_metadata/test_v2.py    | 6 ++----
 3 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/src/zarr/core/metadata/v2.py b/src/zarr/core/metadata/v2.py
index cb09a35bec..3883a998c1 100644
--- a/src/zarr/core/metadata/v2.py
+++ b/src/zarr/core/metadata/v2.py
@@ -158,7 +158,12 @@ def from_dict(cls, data: dict[str, Any]) -> ArrayV2Metadata:
     def to_dict(self) -> dict[str, JSON]:
         zarray_dict = super().to_dict()
         if isinstance(zarray_dict["compressor"], numcodecs.abc.Codec):
-            zarray_dict["compressor"] = zarray_dict["compressor"].get_config()
+            codec_config = zarray_dict["compressor"].get_config()
+            # Hotfix for https://github.com/zarr-developers/zarr-python/issues/2647
+            if codec_config["id"] == "zstd" and not codec_config.get("checksum", False):
+                codec_config.pop("checksum")
+            zarray_dict["compressor"] = codec_config
+
         if zarray_dict["filters"] is not None:
             raw_filters = zarray_dict["filters"]
             new_filters = []
diff --git a/tests/test_metadata/test_dtype.py b/tests/test_metadata/test_dtype.py
index d0a0243a9f..8a1bcdedd1 100644
--- a/tests/test_metadata/test_dtype.py
+++ b/tests/test_metadata/test_dtype.py
@@ -116,7 +116,7 @@ def test_dict_serialization(wrapper_cls: DTYPE) -> None:
         (Complex64(), np.complex64(0)),
         (Complex128(), np.complex128(0)),
         (FixedLengthAsciiString(length=3), np.bytes_(b"")),
-        (StaticRawBytes(length=3), np.void(b"")),
+        (StaticRawBytes(length=3), np.void(b"\x00\x00\x00")),
         (FixedLengthUnicodeString(length=3), np.str_("")),
         (
             Structured(fields=(("a", Float64(), 0), ("b", Int8(), 8))),
diff --git a/tests/test_metadata/test_v2.py b/tests/test_metadata/test_v2.py
index 2637224f93..1c5ddd6f9a 100644
--- a/tests/test_metadata/test_v2.py
+++ b/tests/test_metadata/test_v2.py
@@ -19,8 +19,6 @@
 
     from zarr.abc.codec import Codec
 
-import numcodecs
-
 
 def test_parse_zarr_format_valid() -> None:
     assert parse_zarr_format(2) == 2
@@ -33,8 +31,8 @@ def test_parse_zarr_format_invalid(data: Any) -> None:
 
 
 @pytest.mark.parametrize("attributes", [None, {"foo": "bar"}])
-@pytest.mark.parametrize("filters", [None, (numcodecs.GZip(),)])
-@pytest.mark.parametrize("compressor", [None, numcodecs.GZip()])
+@pytest.mark.parametrize("filters", [None, [{"id": "gzip", "level": 1}]])
+@pytest.mark.parametrize("compressor", [None, {"id": "gzip", "level": 1}])
 @pytest.mark.parametrize("fill_value", [None, 0, 1])
 @pytest.mark.parametrize("order", ["C", "F"])
 @pytest.mark.parametrize("dimension_separator", [".", "/", None])

From 6a7857b15ae360825f92c0c47d2aa5863e481531 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Wed, 12 Mar 2025 10:46:28 +0100
Subject: [PATCH 021/130] refactor data types

---
 src/zarr/api/asynchronous.py             |   2 +-
 src/zarr/codecs/_v2.py                   |   6 +-
 src/zarr/codecs/blosc.py                 |   4 +-
 src/zarr/codecs/bytes.py                 |   5 +-
 src/zarr/codecs/sharding.py              |   4 +-
 src/zarr/core/_info.py                   |   2 +-
 src/zarr/core/array.py                   |  42 +-
 src/zarr/core/array_spec.py              |   8 +-
 src/zarr/core/buffer/cpu.py              |   7 +-
 src/zarr/core/codec_pipeline.py          |   2 +-
 src/zarr/core/dtype/__init__.py          | 115 ++++
 src/zarr/core/dtype/_numpy.py            | 821 +++++++++++++++++++++++
 src/zarr/core/dtype/common.py            | 602 +++++++++++++++++
 src/zarr/core/dtype/registry.py          |  50 ++
 src/zarr/core/dtype/wrapper.py           | 279 ++++++++
 src/zarr/core/metadata/dtype.py          | 808 ----------------------
 src/zarr/core/metadata/v2.py             |  21 +-
 src/zarr/core/metadata/v3.py             |  19 +-
 src/zarr/registry.py                     |   2 +-
 src/zarr/testing/strategies.py           |   6 +-
 tests/conftest.py                        |   4 +-
 tests/test_array.py                      |   8 +-
 tests/test_codecs/test_vlen.py           |   2 +-
 tests/test_metadata/test_consolidated.py |   4 +-
 tests/test_metadata/test_dtype.py        |  51 +-
 tests/test_metadata/test_v2.py           |   2 +-
 tests/test_metadata/test_v3.py           |   6 +-
 27 files changed, 1962 insertions(+), 920 deletions(-)
 create mode 100644 src/zarr/core/dtype/__init__.py
 create mode 100644 src/zarr/core/dtype/_numpy.py
 create mode 100644 src/zarr/core/dtype/common.py
 create mode 100644 src/zarr/core/dtype/registry.py
 create mode 100644 src/zarr/core/dtype/wrapper.py

diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py
index d882b1d7cc..d3e88ae7d3 100644
--- a/src/zarr/api/asynchronous.py
+++ b/src/zarr/api/asynchronous.py
@@ -28,6 +28,7 @@
     _warn_order_kwarg,
     _warn_write_empty_chunks_kwarg,
 )
+from zarr.core.dtype import get_data_type_from_numpy
 from zarr.core.group import (
     AsyncGroup,
     ConsolidatedMetadata,
@@ -35,7 +36,6 @@
     create_hierarchy,
 )
 from zarr.core.metadata import ArrayMetadataDict, ArrayV2Metadata, ArrayV3Metadata
-from zarr.core.metadata.dtype import get_data_type_from_numpy
 from zarr.errors import NodeTypeValidationError
 from zarr.storage._common import make_store_path
 
diff --git a/src/zarr/codecs/_v2.py b/src/zarr/codecs/_v2.py
index e2f228f509..a89d1f5fa4 100644
--- a/src/zarr/codecs/_v2.py
+++ b/src/zarr/codecs/_v2.py
@@ -48,7 +48,7 @@ async def _decode_single(
         # segfaults and other bad things happening
         if chunk_spec.dtype != object:
             try:
-                chunk = chunk.view(chunk_spec.dtype.unwrap())
+                chunk = chunk.view(chunk_spec.dtype.to_dtype())
             except TypeError:
                 # this will happen if the dtype of the chunk
                 # does not match the dtype of the array spec i.g. if
@@ -56,7 +56,7 @@ async def _decode_single(
                 # is an object array. In this case, we need to convert the object
                 # array to the correct dtype.
 
-                chunk = np.array(chunk).astype(chunk_spec.dtype.unwrap())
+                chunk = np.array(chunk).astype(chunk_spec.dtype.to_dtype())
 
         elif chunk.dtype != object:
             # If we end up here, someone must have hacked around with the filters.
@@ -80,7 +80,7 @@ async def _encode_single(
         chunk = chunk_array.as_ndarray_like()
 
         # ensure contiguous and correct order
-        chunk = chunk.astype(chunk_spec.dtype.unwrap(), order=chunk_spec.order, copy=False)
+        chunk = chunk.astype(chunk_spec.dtype.to_dtype(), order=chunk_spec.order, copy=False)
 
         # apply filters
         if self.filters:
diff --git a/src/zarr/codecs/blosc.py b/src/zarr/codecs/blosc.py
index d7cd1f0113..79be926ad8 100644
--- a/src/zarr/codecs/blosc.py
+++ b/src/zarr/codecs/blosc.py
@@ -139,13 +139,13 @@ def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self:
         dtype = array_spec.dtype
         new_codec = self
         if new_codec.typesize is None:
-            new_codec = replace(new_codec, typesize=dtype.unwrap().itemsize)
+            new_codec = replace(new_codec, typesize=dtype.to_dtype().itemsize)
         if new_codec.shuffle is None:
             new_codec = replace(
                 new_codec,
                 shuffle=(
                     BloscShuffle.bitshuffle
-                    if dtype.unwrap().itemsize == 1
+                    if dtype.to_dtype().itemsize == 1
                     else BloscShuffle.shuffle
                 ),
             )
diff --git a/src/zarr/codecs/bytes.py b/src/zarr/codecs/bytes.py
index 9a5a217abf..e7b57ab9b3 100644
--- a/src/zarr/codecs/bytes.py
+++ b/src/zarr/codecs/bytes.py
@@ -10,6 +10,7 @@
 from zarr.abc.codec import ArrayBytesCodec
 from zarr.core.buffer import Buffer, NDArrayLike, NDBuffer
 from zarr.core.common import JSON, parse_enum, parse_named_configuration
+from zarr.core.dtype.common import endianness_to_numpy_str
 from zarr.registry import register_codec
 
 if TYPE_CHECKING:
@@ -56,7 +57,7 @@ def to_dict(self) -> dict[str, JSON]:
             return {"name": "bytes", "configuration": {"endian": self.endian.value}}
 
     def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self:
-        if array_spec.dtype.unwrap().itemsize == 1:
+        if array_spec.dtype.to_dtype().itemsize == 1:
             if self.endian is not None:
                 return replace(self, endian=None)
         elif self.endian is None:
@@ -73,7 +74,7 @@ async def _decode_single(
         assert isinstance(chunk_bytes, Buffer)
         # TODO: remove endianness enum in favor of literal union
         endian_str = self.endian.value if self.endian is not None else None
-        dtype = chunk_spec.dtype.with_endianness(endian_str).unwrap()
+        dtype = chunk_spec.dtype.to_dtype().newbyteorder(endianness_to_numpy_str(endian_str))
 
         as_array_like = chunk_bytes.as_array_like()
         if isinstance(as_array_like, NDArrayLike):
diff --git a/src/zarr/codecs/sharding.py b/src/zarr/codecs/sharding.py
index 7163a5fd7f..c501346980 100644
--- a/src/zarr/codecs/sharding.py
+++ b/src/zarr/codecs/sharding.py
@@ -50,7 +50,6 @@
     get_indexer,
     morton_order_iter,
 )
-from zarr.core.metadata.dtype import DTypeWrapper
 from zarr.core.metadata.v3 import parse_codecs
 from zarr.registry import get_ndbuffer_class, get_pipeline_class, register_codec
 
@@ -59,6 +58,7 @@
     from typing import Self
 
     from zarr.core.common import JSON
+    from zarr.core.dtype.wrapper import DTypeWrapper
 
 MAX_UINT_64 = 2**64 - 1
 ShardMapping = Mapping[ChunkCoords, Buffer]
@@ -488,7 +488,7 @@ async def _decode_partial_single(
         # setup output array
         out = shard_spec.prototype.nd_buffer.create(
             shape=indexer.shape,
-            dtype=shard_spec.dtype.unwrap(),
+            dtype=shard_spec.dtype.to_dtype(),
             order=shard_spec.order,
             fill_value=0,
         )
diff --git a/src/zarr/core/_info.py b/src/zarr/core/_info.py
index 6b594583e2..a632b8c602 100644
--- a/src/zarr/core/_info.py
+++ b/src/zarr/core/_info.py
@@ -7,7 +7,7 @@
 
 from zarr.abc.codec import ArrayArrayCodec, ArrayBytesCodec, BytesBytesCodec
 from zarr.core.common import ZarrFormat
-from zarr.core.metadata.dtype import DTypeWrapper
+from zarr.core.dtype.wrapper import DTypeWrapper
 
 # from zarr.core.metadata.v3 import DataType
 
diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py
index 7718aa505f..a060bcbfae 100644
--- a/src/zarr/core/array.py
+++ b/src/zarr/core/array.py
@@ -67,6 +67,13 @@
     product,
 )
 from zarr.core.config import config as zarr_config
+from zarr.core.dtype import (
+    DTypeWrapper,
+    FixedLengthAsciiString,
+    FixedLengthUnicodeString,
+    VariableLengthString,
+    parse_data_type,
+)
 from zarr.core.indexing import (
     BasicIndexer,
     BasicSelection,
@@ -101,13 +108,6 @@
     ArrayV3MetadataDict,
     T_ArrayMetadata,
 )
-from zarr.core.metadata.dtype import (
-    DTypeWrapper,
-    FixedLengthAsciiString,
-    FixedLengthUnicodeString,
-    VariableLengthString,
-    get_data_type_from_numpy,
-)
 from zarr.core.metadata.v2 import (
     parse_compressor,
     parse_filters,
@@ -555,7 +555,7 @@ async def _create(
         *,
         # v2 and v3
         shape: ShapeLike,
-        dtype: npt.DTypeLike[Any] | DTypeWrapper[Any, Any],
+        dtype: npt.DTypeLike | DTypeWrapper[Any, Any],
         zarr_format: ZarrFormat = 3,
         fill_value: Any | None = None,
         attributes: dict[str, JSON] | None = None,
@@ -584,11 +584,8 @@ async def _create(
         See :func:`AsyncArray.create` for more details.
         Deprecated in favor of :func:`zarr.api.asynchronous.create_array`.
         """
-        # TODO: delete this and be more strict about where parsing occurs
-        if not isinstance(dtype, DTypeWrapper):
-            dtype_parsed = get_data_type_from_numpy(np.dtype(dtype))
-        else:
-            dtype_parsed = dtype
+
+        dtype_parsed = parse_data_type(dtype)
         store_path = await make_store_path(store)
 
         shape = parse_shapelike(shape)
@@ -597,9 +594,9 @@ async def _create(
             raise ValueError("Only one of chunk_shape or chunks can be provided.")
 
         if chunks:
-            _chunks = normalize_chunks(chunks, shape, dtype_parsed.unwrap().itemsize)
+            _chunks = normalize_chunks(chunks, shape, dtype_parsed.to_dtype().itemsize)
         else:
-            _chunks = normalize_chunks(chunk_shape, shape, dtype_parsed.unwrap().itemsize)
+            _chunks = normalize_chunks(chunk_shape, shape, dtype_parsed.to_dtype().itemsize)
         config_parsed = parse_array_config(config)
 
         result: AsyncArray[ArrayV3Metadata] | AsyncArray[ArrayV2Metadata]
@@ -701,7 +698,7 @@ def _create_metadata_v3(
         else:
             chunk_key_encoding_parsed = chunk_key_encoding
 
-        if dtype.unwrap().kind in ("U", "T", "S"):
+        if dtype.to_dtype().kind in ("U", "T", "S"):
             warn(
                 f"The dtype `{dtype}` is currently not part in the Zarr format 3 specification. It "
                 "may not be supported by other zarr implementations and may change in the future.",
@@ -1053,9 +1050,9 @@ def dtype(self) -> np.dtype[Any]:
             Data type of the array
         """
         if self.metadata.zarr_format == 2:
-            return self.metadata.dtype.unwrap()
+            return self.metadata.dtype.to_dtype()
         else:
-            return self.metadata.data_type.unwrap()
+            return self.metadata.data_type.to_dtype()
 
     @property
     def order(self) -> MemoryOrder:
@@ -3930,10 +3927,7 @@ async def init_array(
 
     from zarr.codecs.sharding import ShardingCodec, ShardingCodecIndexLocation
 
-    if not isinstance(dtype, DTypeWrapper):
-        dtype_wrapped = get_data_type_from_numpy(dtype)
-    else:
-        dtype_wrapped = dtype
+    dtype_wrapped = parse_data_type(dtype)
     shape_parsed = parse_shapelike(shape)
     chunk_key_encoding_parsed = _parse_chunk_key_encoding(
         chunk_key_encoding, zarr_format=zarr_format
@@ -3951,7 +3945,7 @@ async def init_array(
         array_shape=shape_parsed,
         shard_shape=shards,
         chunk_shape=chunks,
-        item_size=dtype_wrapped.unwrap().itemsize,
+        item_size=dtype_wrapped.to_dtype().itemsize,
     )
     chunks_out: tuple[int, ...]
     meta: ArrayV2Metadata | ArrayV3Metadata
@@ -4241,7 +4235,7 @@ def _get_default_chunk_encoding_v3(
     elif isinstance(dtype, FixedLengthAsciiString):
         serializer = VLenBytesCodec()
     else:
-        if dtype.unwrap().itemsize == 1:
+        if dtype.to_dtype().itemsize == 1:
             serializer = BytesCodec(endian=None)
         else:
             serializer = BytesCodec()
diff --git a/src/zarr/core/array_spec.py b/src/zarr/core/array_spec.py
index f5a060cf95..f297fafa24 100644
--- a/src/zarr/core/array_spec.py
+++ b/src/zarr/core/array_spec.py
@@ -11,7 +11,7 @@
     parse_shapelike,
 )
 from zarr.core.config import config as zarr_config
-from zarr.core.metadata.dtype import DTypeWrapper, get_data_type_from_numpy
+from zarr.core.dtype import parse_data_type
 
 if TYPE_CHECKING:
     from typing import NotRequired
@@ -20,6 +20,7 @@
 
     from zarr.core.buffer import BufferPrototype
     from zarr.core.common import ChunkCoords
+    from zarr.core.dtype.wrapper import DTypeWrapper
 
 
 class ArrayConfigParams(TypedDict):
@@ -105,10 +106,7 @@ def __init__(
         prototype: BufferPrototype,
     ) -> None:
         shape_parsed = parse_shapelike(shape)
-        if not isinstance(dtype, DTypeWrapper):
-            dtype_parsed = get_data_type_from_numpy(dtype)
-        else:
-            dtype_parsed = dtype
+        dtype_parsed = parse_data_type(dtype)
 
         fill_value_parsed = parse_fill_value(fill_value)
 
diff --git a/src/zarr/core/buffer/cpu.py b/src/zarr/core/buffer/cpu.py
index 00444a6f76..9894fced51 100644
--- a/src/zarr/core/buffer/cpu.py
+++ b/src/zarr/core/buffer/cpu.py
@@ -10,7 +10,6 @@
 import numpy.typing as npt
 
 from zarr.core.buffer import core
-from zarr.core.metadata.dtype import DTypeWrapper
 from zarr.registry import (
     register_buffer,
     register_ndbuffer,
@@ -158,11 +157,7 @@ def create(
         if fill_value is None:
             return cls(np.zeros(shape=tuple(shape), dtype=dtype, order=order))
         else:
-            return cls(
-                np.full(
-                    shape=tuple(shape), fill_value=fill_value, dtype=dtype, order=order
-                )
-            )
+            return cls(np.full(shape=tuple(shape), fill_value=fill_value, dtype=dtype, order=order))
 
     @classmethod
     def from_numpy_array(cls, array_like: npt.ArrayLike) -> Self:
diff --git a/src/zarr/core/codec_pipeline.py b/src/zarr/core/codec_pipeline.py
index 5ee4f03799..222e97ce74 100644
--- a/src/zarr/core/codec_pipeline.py
+++ b/src/zarr/core/codec_pipeline.py
@@ -316,7 +316,7 @@ def _merge_chunk_array(
         if existing_chunk_array is None:
             chunk_array = chunk_spec.prototype.nd_buffer.create(
                 shape=chunk_spec.shape,
-                dtype=chunk_spec.dtype.unwrap(),
+                dtype=chunk_spec.dtype.to_dtype(),
                 order=chunk_spec.order,
                 fill_value=fill_value_or_default(chunk_spec),
             )
diff --git a/src/zarr/core/dtype/__init__.py b/src/zarr/core/dtype/__init__.py
new file mode 100644
index 0000000000..432eabf2ce
--- /dev/null
+++ b/src/zarr/core/dtype/__init__.py
@@ -0,0 +1,115 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any, get_args
+
+import numpy as np
+
+from zarr.core.dtype.common import _NUMPY_SUPPORTS_VLEN_STRING
+
+if TYPE_CHECKING:
+    import numpy.typing as npt
+
+    from zarr.core.common import JSON
+
+from zarr.core.dtype._numpy import (
+    Bool,
+    Complex64,
+    Complex128,
+    DateTime64,
+    FixedLengthAsciiString,
+    FixedLengthBytes,
+    FixedLengthUnicodeString,
+    Float16,
+    Float32,
+    Float64,
+    Int8,
+    Int16,
+    Int32,
+    Int64,
+    Structured,
+    UInt8,
+    UInt16,
+    UInt32,
+    UInt64,
+    VariableLengthString,
+)
+from zarr.core.dtype.registry import DataTypeRegistry
+from zarr.core.dtype.wrapper import DTypeWrapper
+
+__all__ = [
+    "Complex64",
+    "Complex128",
+    "DTypeWrapper",
+    "DateTime64",
+    "FixedLengthAsciiString",
+    "FixedLengthBytes",
+    "FixedLengthUnicodeString",
+    "Float16",
+    "Float32",
+    "Float64",
+    "Int8",
+    "Int16",
+    "Int32",
+    "Int64",
+    "Structured",
+    "UInt8",
+    "UInt16",
+    "UInt32",
+    "UInt64",
+    "VariableLengthString",
+    "data_type_registry",
+    "parse_data_type",
+]
+
+data_type_registry = DataTypeRegistry()
+
+INTEGER_DTYPE = Int8 | Int16 | Int32 | Int64 | UInt8 | UInt16 | UInt32 | UInt64
+FLOAT_DTYPE = Float16 | Float32 | Float64
+COMPLEX_DTYPE = Complex64 | Complex128
+STRING_DTYPE = FixedLengthUnicodeString | VariableLengthString | FixedLengthAsciiString
+DTYPE = (
+    Bool
+    | INTEGER_DTYPE
+    | FLOAT_DTYPE
+    | COMPLEX_DTYPE
+    | STRING_DTYPE
+    | FixedLengthBytes
+    | Structured
+    | DateTime64
+)
+
+for dtype in get_args(DTYPE):
+    data_type_registry.register(dtype._zarr_v3_name, dtype)
+
+
+def get_data_type_from_numpy(dtype: npt.DTypeLike) -> DTypeWrapper[Any, Any]:
+    data_type_registry.lazy_load()
+    if not isinstance(dtype, np.dtype):
+        if dtype in (str, "str"):
+            if _NUMPY_SUPPORTS_VLEN_STRING:
+                np_dtype = np.dtype("T")
+            else:
+                np_dtype = np.dtype("O")
+        elif isinstance(dtype, list):
+            # this is a valid _VoidDTypeLike check
+            np_dtype = np.dtype([tuple(d) for d in dtype])
+        else:
+            np_dtype = np.dtype(dtype)
+    else:
+        np_dtype = dtype
+    return data_type_registry.match_dtype(np_dtype)
+
+
+def get_data_type_from_dict(dtype: dict[str, JSON]) -> DTypeWrapper[Any, Any]:
+    return data_type_registry.match_json(dtype)
+
+
+def parse_data_type(
+    dtype: npt.DTypeLike | DTypeWrapper[Any, Any] | dict[str, JSON],
+) -> DTypeWrapper[Any, Any]:
+    if isinstance(dtype, DTypeWrapper):
+        return dtype
+    elif isinstance(dtype, dict):
+        return get_data_type_from_dict(dtype)
+    else:
+        return get_data_type_from_numpy(dtype)
diff --git a/src/zarr/core/dtype/_numpy.py b/src/zarr/core/dtype/_numpy.py
new file mode 100644
index 0000000000..b98cc100e3
--- /dev/null
+++ b/src/zarr/core/dtype/_numpy.py
@@ -0,0 +1,821 @@
+from __future__ import annotations
+
+import base64
+import re
+from dataclasses import dataclass
+from typing import TYPE_CHECKING, Any, ClassVar, Literal, Self, TypeGuard, cast, get_args
+
+import numpy as np
+
+from zarr.core.dtype.common import (
+    _NUMPY_SUPPORTS_VLEN_STRING,
+    DataTypeValidationError,
+    Endianness,
+    JSONFloat,
+    bytes_from_json,
+    bytes_to_json,
+    check_json_bool,
+    check_json_complex_float,
+    check_json_complex_float_v3,
+    check_json_float_v2,
+    check_json_int,
+    check_json_str,
+    complex_from_json,
+    complex_to_json,
+    datetime_from_json,
+    datetime_to_json,
+    endianness_from_numpy_str,
+    endianness_to_numpy_str,
+    float_from_json,
+    float_to_json,
+)
+from zarr.core.dtype.wrapper import DTypeWrapper, TDType
+
+if TYPE_CHECKING:
+    from zarr.core.common import JSON, ZarrFormat
+
+
+@dataclass(frozen=True, kw_only=True)
+class Bool(DTypeWrapper[np.dtypes.BoolDType, np.bool_]):
+    """
+    Wrapper for numpy boolean dtype.
+
+    Attributes
+    ----------
+    name : str
+        The name of the dtype.
+    dtype_cls : ClassVar[type[np.dtypes.BoolDType]]
+        The numpy dtype class.
+    """
+
+    _zarr_v3_name = "bool"
+    dtype_cls: ClassVar[type[np.dtypes.BoolDType]] = np.dtypes.BoolDType
+
+    def to_dict(self) -> dict[str, JSON]:
+        return {"name": self.get_name(zarr_format=3)}
+
+    def default_value(self) -> np.bool_:
+        """
+        Get the default value for the boolean dtype.
+
+        Returns
+        -------
+        np.bool_
+            The default value.
+        """
+        return np.False_
+
+    @classmethod
+    def _from_dtype_unsafe(cls, dtype: np.dtypes.BoolDType) -> Self:
+        """
+        Wrap a numpy boolean dtype without checking.
+
+        Parameters
+        ----------
+        dtype : np.dtypes.BoolDType
+            The numpy dtype to wrap.
+
+        Returns
+        -------
+        Self
+            The wrapped dtype.
+        """
+        return cls()
+
+    def to_dtype(self) -> np.dtypes.BoolDType:
+        return self.dtype_cls()
+
+    def to_json_value(self, data: np.bool_, zarr_format: ZarrFormat) -> bool:
+        """
+        Convert a boolean value to JSON-serializable format.
+
+        Parameters
+        ----------
+        data : object
+            The value to convert.
+        zarr_format : ZarrFormat
+            The zarr format version.
+
+        Returns
+        -------
+        bool
+            The JSON-serializable format.
+        """
+        return bool(data)
+
+    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.bool_:
+        """
+        Read a JSON-serializable value as a numpy boolean scalar.
+
+        Parameters
+        ----------
+        data : JSON
+            The JSON-serializable value.
+        zarr_format : ZarrFormat
+            The zarr format version.
+
+        Returns
+        -------
+        np.bool_
+            The numpy boolean scalar.
+        """
+        if check_json_bool(data):
+            return self.cast_value(data)
+        raise TypeError(f"Invalid type: {data}. Expected a boolean.")
+
+
+@dataclass(frozen=True, kw_only=True)
+class Int8(DTypeWrapper[np.dtypes.Int8DType, np.int8]):
+    dtype_cls = np.dtypes.Int8DType
+    _zarr_v3_name = "int8"
+
+    @classmethod
+    def _from_dtype_unsafe(cls, dtype: np.dtypes.Int8DType) -> Self:
+        return cls()
+
+    def to_dtype(self) -> np.dtypes.Int8DType:
+        return self.dtype_cls()
+
+    def to_dict(self) -> dict[str, JSON]:
+        return {"name": self.get_name(zarr_format=3)}
+
+    def default_value(self) -> np.int8:
+        return self.to_dtype().type(0)
+
+    def to_json_value(self, data: np.int8, zarr_format: ZarrFormat) -> int:
+        return int(data)
+
+    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.int8:
+        if check_json_int(data):
+            return self.cast_value(data)
+        raise TypeError(f"Invalid type: {data}. Expected an integer.")
+
+
+@dataclass(frozen=True, kw_only=True)
+class UInt8(DTypeWrapper[np.dtypes.UInt8DType, np.uint8]):
+    dtype_cls = np.dtypes.UInt8DType
+    _zarr_v3_name = "uint8"
+
+    @classmethod
+    def _from_dtype_unsafe(cls, dtype: np.dtypes.UInt8DType) -> Self:
+        return cls()
+
+    def to_dtype(self) -> np.dtypes.UInt8DType:
+        return self.dtype_cls()
+
+    def to_dict(self) -> dict[str, JSON]:
+        return {"name": self.get_name(zarr_format=3)}
+
+    def default_value(self) -> np.uint8:
+        return self.to_dtype().type(0)
+
+    def to_json_value(self, data: np.uint8, zarr_format: ZarrFormat) -> int:
+        return int(data)
+
+    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.uint8:
+        if check_json_int(data):
+            return self.cast_value(data)
+        raise TypeError(f"Invalid type: {data}. Expected an integer.")
+
+
+@dataclass(frozen=True, kw_only=True)
+class Int16(DTypeWrapper[np.dtypes.Int16DType, np.int16]):
+    dtype_cls = np.dtypes.Int16DType
+    _zarr_v3_name = "int16"
+    endianness: Endianness | None = "native"
+
+    @classmethod
+    def _from_dtype_unsafe(cls, dtype: np.dtypes.Int16DType) -> Self:
+        return cls(endianness=endianness_from_numpy_str(dtype.byteorder))
+
+    def to_dtype(self) -> np.dtypes.Int16DType:
+        return self.dtype_cls().newbyteorder(endianness_to_numpy_str(self.endianness))
+
+    def to_dict(self) -> dict[str, JSON]:
+        return {"name": self.get_name(zarr_format=3)}
+
+    def default_value(self) -> np.int16:
+        return self.cast_value(0)
+
+    def to_json_value(self, data: np.int16, zarr_format: ZarrFormat) -> int:
+        return int(data)
+
+    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.int16:
+        if check_json_int(data):
+            return self.cast_value(data)
+        raise TypeError(f"Invalid type: {data}. Expected an integer.")
+
+
+@dataclass(frozen=True, kw_only=True)
+class UInt16(DTypeWrapper[np.dtypes.UInt16DType, np.uint16]):
+    dtype_cls = np.dtypes.UInt16DType
+    _zarr_v3_name = "uint16"
+    endianness: Endianness | None = "native"
+
+    @classmethod
+    def _from_dtype_unsafe(cls, dtype: np.dtypes.UInt16DType) -> Self:
+        return cls(endianness=endianness_from_numpy_str(dtype.byteorder))
+
+    def to_dtype(self) -> np.dtypes.UInt16DType:
+        return self.dtype_cls().newbyteorder(endianness_to_numpy_str(self.endianness))
+
+    def to_dict(self) -> dict[str, JSON]:
+        return {"name": self.get_name(zarr_format=3)}
+
+    def default_value(self) -> np.uint16:
+        return self.cast_value(0)
+
+    def to_json_value(self, data: np.uint16, zarr_format: ZarrFormat) -> int:
+        return int(data)
+
+    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.uint16:
+        if check_json_int(data):
+            return self.cast_value(data)
+        raise TypeError(f"Invalid type: {data}. Expected an integer.")
+
+
+@dataclass(frozen=True, kw_only=True)
+class Int32(DTypeWrapper[np.dtypes.Int32DType, np.int32]):
+    dtype_cls = np.dtypes.Int32DType
+    _zarr_v3_name = "int32"
+    endianness: Endianness | None = "native"
+
+    @classmethod
+    def _from_dtype_unsafe(cls, dtype: np.dtypes.Int32DType) -> Self:
+        return cls(endianness=endianness_from_numpy_str(dtype.byteorder))
+
+    def to_dtype(self) -> np.dtypes.Int32DType:
+        return self.dtype_cls().newbyteorder(endianness_to_numpy_str(self.endianness))
+
+    def to_dict(self) -> dict[str, JSON]:
+        return {"name": self.get_name(zarr_format=3)}
+
+    def default_value(self) -> np.int32:
+        return self.cast_value(0)
+
+    def to_json_value(self, data: np.int32, zarr_format: ZarrFormat) -> int:
+        return int(data)
+
+    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.int32:
+        if check_json_int(data):
+            return self.cast_value(data)
+        raise TypeError(f"Invalid type: {data}. Expected an integer.")
+
+
+@dataclass(frozen=True, kw_only=True)
+class UInt32(DTypeWrapper[np.dtypes.UInt32DType, np.uint32]):
+    dtype_cls = np.dtypes.UInt32DType
+    _zarr_v3_name = "uint32"
+    endianness: Endianness | None = "native"
+
+    @classmethod
+    def _from_dtype_unsafe(cls, dtype: np.dtypes.UInt32DType) -> Self:
+        return cls(endianness=endianness_from_numpy_str(dtype.byteorder))
+
+    def to_dtype(self) -> np.dtypes.UInt32DType:
+        return self.dtype_cls().newbyteorder(endianness_to_numpy_str(self.endianness))
+
+    def to_dict(self) -> dict[str, JSON]:
+        return {"name": self.get_name(zarr_format=3)}
+
+    def default_value(self) -> np.uint32:
+        return self.cast_value(0)
+
+    def to_json_value(self, data: np.uint32, zarr_format: ZarrFormat) -> int:
+        return int(data)
+
+    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.uint32:
+        if check_json_int(data):
+            return self.cast_value(data)
+        raise TypeError(f"Invalid type: {data}. Expected an integer.")
+
+
+@dataclass(frozen=True, kw_only=True)
+class Int64(DTypeWrapper[np.dtypes.Int64DType, np.int64]):
+    dtype_cls = np.dtypes.Int64DType
+    _zarr_v3_name = "int64"
+    endianness: Endianness | None = "native"
+
+    @classmethod
+    def _from_dtype_unsafe(cls, dtype: np.dtypes.Int64DType) -> Self:
+        return cls(endianness=endianness_from_numpy_str(dtype.byteorder))
+
+    def to_dtype(self) -> np.dtypes.Int64DType:
+        return self.dtype_cls().newbyteorder(endianness_to_numpy_str(self.endianness))
+
+    def to_dict(self) -> dict[str, JSON]:
+        return {"name": self.get_name(zarr_format=3)}
+
+    def default_value(self) -> np.int64:
+        return self.cast_value(0)
+
+    def to_json_value(self, data: np.int64, zarr_format: ZarrFormat) -> int:
+        return int(data)
+
+    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.int64:
+        if check_json_int(data):
+            return self.cast_value(data)
+        raise TypeError(f"Invalid type: {data}. Expected an integer.")
+
+
+@dataclass(frozen=True, kw_only=True)
+class UInt64(DTypeWrapper[np.dtypes.UInt64DType, np.uint64]):
+    dtype_cls = np.dtypes.UInt64DType
+    _zarr_v3_name = "uint64"
+    endianness: Endianness | None = "native"
+
+    @classmethod
+    def _from_dtype_unsafe(cls, dtype: np.dtypes.UInt64DType) -> Self:
+        return cls(endianness=endianness_from_numpy_str(dtype.byteorder))
+
+    def to_dtype(self) -> np.dtypes.UInt64DType:
+        return self.dtype_cls().newbyteorder(endianness_to_numpy_str(self.endianness))
+
+    def to_dict(self) -> dict[str, JSON]:
+        return {"name": self.get_name(zarr_format=3)}
+
+    def default_value(self) -> np.uint64:
+        return self.cast_value(0)
+
+    def to_json_value(self, data: np.uint64, zarr_format: ZarrFormat) -> int:
+        return int(data)
+
+    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.uint64:
+        if check_json_int(data):
+            return self.cast_value(data)
+        raise TypeError(f"Invalid type: {data}. Expected an integer.")
+
+
+@dataclass(frozen=True, kw_only=True)
+class Float16(DTypeWrapper[np.dtypes.Float16DType, np.float16]):
+    dtype_cls = np.dtypes.Float16DType
+    _zarr_v3_name = "float16"
+    endianness: Endianness | None = "native"
+
+    @classmethod
+    def _from_dtype_unsafe(cls, dtype: np.dtypes.Float16DType) -> Self:
+        return cls(endianness=endianness_from_numpy_str(dtype.byteorder))
+
+    def to_dtype(self) -> np.dtypes.Float16DType:
+        return self.dtype_cls().newbyteorder(endianness_to_numpy_str(self.endianness))
+
+    def to_dict(self) -> dict[str, JSON]:
+        return {"name": self.get_name(zarr_format=3)}
+
+    def default_value(self) -> np.float16:
+        return self.to_dtype().type(0.0)
+
+    def to_json_value(self, data: np.float16, zarr_format: ZarrFormat) -> JSONFloat:
+        return float_to_json(data, zarr_format)
+
+    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.float16:
+        if check_json_float_v2(data):
+            return self.to_dtype().type(float_from_json(data, zarr_format))
+        raise TypeError(f"Invalid type: {data}. Expected a float.")
+
+
+@dataclass(frozen=True, kw_only=True)
+class Float32(DTypeWrapper[np.dtypes.Float32DType, np.float32]):
+    dtype_cls = np.dtypes.Float32DType
+    _zarr_v3_name = "float32"
+    endianness: Endianness | None = "native"
+
+    @classmethod
+    def _from_dtype_unsafe(cls, dtype: np.dtypes.Float32DType) -> Self:
+        return cls(endianness=endianness_from_numpy_str(dtype.byteorder))
+
+    def to_dtype(self) -> np.dtypes.Float32DType:
+        return self.dtype_cls().newbyteorder(endianness_to_numpy_str(self.endianness))
+
+    def to_dict(self) -> dict[str, JSON]:
+        return {"name": self.get_name(zarr_format=3)}
+
+    def cast_value(self, value: object) -> np.float32:
+        return self.to_dtype().type(value)
+
+    def default_value(self) -> np.float32:
+        return self.to_dtype().type(0.0)
+
+    def to_json_value(self, data: np.float32, zarr_format: ZarrFormat) -> JSONFloat:
+        return float_to_json(data, zarr_format)
+
+    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.float32:
+        if check_json_float_v2(data):
+            return self.to_dtype().type(float_from_json(data, zarr_format))
+        raise TypeError(f"Invalid type: {data}. Expected a float.")
+
+
+@dataclass(frozen=True, kw_only=True)
+class Float64(DTypeWrapper[np.dtypes.Float64DType, np.float64]):
+    dtype_cls = np.dtypes.Float64DType
+    _zarr_v3_name = "float64"
+    endianness: Endianness | None = "native"
+
+    @classmethod
+    def _from_dtype_unsafe(cls, dtype: np.dtypes.Float64DType) -> Self:
+        return cls(endianness=endianness_from_numpy_str(dtype.byteorder))
+
+    def to_dtype(self) -> np.dtypes.Float64DType:
+        return self.dtype_cls().newbyteorder(endianness_to_numpy_str(self.endianness))
+
+    def to_dict(self) -> dict[str, JSON]:
+        return {"name": self.get_name(zarr_format=3)}
+
+    def default_value(self) -> np.float64:
+        return self.to_dtype().type(0.0)
+
+    def to_json_value(self, data: np.float64, zarr_format: ZarrFormat) -> JSONFloat:
+        return float_to_json(data, zarr_format)
+
+    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.float64:
+        if check_json_float_v2(data):
+            return self.to_dtype().type(float_from_json(data, zarr_format))
+        raise TypeError(f"Invalid type: {data}. Expected a float.")
+
+
+@dataclass(frozen=True, kw_only=True)
+class Complex64(DTypeWrapper[np.dtypes.Complex64DType, np.complex64]):
+    dtype_cls = np.dtypes.Complex64DType
+    _zarr_v3_name = "complex64"
+    endianness: Endianness | None = "native"
+
+    @classmethod
+    def _from_dtype_unsafe(cls, dtype: np.dtypes.Complex64DType) -> Self:
+        return cls()
+
+    def to_dtype(self) -> np.dtypes.Complex64DType:
+        return self.dtype_cls().newbyteorder(endianness_to_numpy_str(self.endianness))
+
+    def to_dict(self) -> dict[str, JSON]:
+        return {"name": self.get_name(zarr_format=3)}
+
+    def default_value(self) -> np.complex64:
+        return np.complex64(0.0)
+
+    def to_json_value(
+        self, data: np.complex64, zarr_format: ZarrFormat
+    ) -> tuple[JSONFloat, JSONFloat]:
+        return complex_to_json(data, zarr_format)
+
+    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.complex64:
+        if check_json_complex_float(data, zarr_format=zarr_format):
+            return complex_from_json(data, dtype=self.to_dtype(), zarr_format=zarr_format)
+        raise TypeError(f"Invalid type: {data}. Expected a complex float.")
+
+
+@dataclass(frozen=True, kw_only=True)
+class Complex128(DTypeWrapper[np.dtypes.Complex128DType, np.complex128]):
+    dtype_cls = np.dtypes.Complex128DType
+    _zarr_v3_name = "complex128"
+    endianness: Endianness | None = "native"
+
+    @classmethod
+    def _from_dtype_unsafe(cls, dtype: np.dtypes.Complex128DType) -> Self:
+        return cls(endianness=endianness_from_numpy_str(dtype.byteorder))
+
+    def to_dtype(self) -> np.dtypes.Complex128DType:
+        return self.dtype_cls().newbyteorder(endianness_to_numpy_str(self.endianness))
+
+    def to_dict(self) -> dict[str, JSON]:
+        return {"name": self.get_name(zarr_format=3)}
+
+    def default_value(self) -> np.complex128:
+        return np.complex128(0.0)
+
+    def to_json_value(
+        self, data: np.complex128, zarr_format: ZarrFormat
+    ) -> tuple[JSONFloat, JSONFloat]:
+        return complex_to_json(data, zarr_format)
+
+    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.complex128:
+        if check_json_complex_float_v3(data):
+            return complex_from_json(data, dtype=self.to_dtype(), zarr_format=zarr_format)
+        raise TypeError(f"Invalid type: {data}. Expected a complex float.")
+
+
+@dataclass(frozen=True, kw_only=True)
+class FixedLengthAsciiString(DTypeWrapper[np.dtypes.BytesDType[Any], np.bytes_]):
+    dtype_cls = np.dtypes.BytesDType
+    _zarr_v3_name = "numpy.static_byte_string"
+    item_size_bits: ClassVar[int] = 8
+    length: int = 1
+
+    @classmethod
+    def _from_dtype_unsafe(cls, dtype: np.dtypes.BytesDType) -> Self:
+        return cls(length=dtype.itemsize // (cls.item_size_bits // 8))
+
+    def to_dtype(self) -> np.dtypes.BytesDType:
+        return self.dtype_cls(self.length)
+
+    def default_value(self) -> np.bytes_:
+        return np.bytes_(b"")
+
+    def to_dict(self) -> dict[str, JSON]:
+        return {"name": self.get_name(zarr_format=3), "configuration": {"length": self.length}}
+
+    def to_json_value(self, data: np.bytes_, *, zarr_format: ZarrFormat) -> str:
+        return base64.standard_b64encode(data).decode("ascii")
+
+    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.bytes_:
+        if check_json_str(data):
+            return self.to_dtype().type(base64.standard_b64decode(data.encode("ascii")))
+        raise TypeError(f"Invalid type: {data}. Expected a string.")
+
+
+@dataclass(frozen=True, kw_only=True)
+class FixedLengthBytes(DTypeWrapper[np.dtypes.VoidDType[Any], np.void]):
+    dtype_cls = np.dtypes.VoidDType[Any]
+    _zarr_v3_name = "r*"
+    item_size_bits: ClassVar[int] = 8
+    length: int = 1
+
+    @classmethod
+    def _from_dtype_unsafe(cls, dtype: np.dtypes.VoidDType[Any]) -> Self:
+        return cls(length=dtype.itemsize // (cls.item_size_bits // 8))
+
+    def default_value(self) -> np.void:
+        return self.cast_value(("\x00" * self.length).encode("ascii"))
+
+    def to_dtype(self) -> np.dtypes.VoidDType[Any]:
+        # Numpy does not allow creating a void type
+        # by invoking np.dtypes.VoidDType directly
+        return np.dtype(f"V{self.length}")
+
+    def get_name(self, zarr_format: ZarrFormat) -> str:
+        if zarr_format == 2:
+            return super().get_name(zarr_format=zarr_format)
+        # note that we don't return self._zarr_v3_name
+        # because the name is parametrized by the length
+        return f"r{self.length * self.item_size_bits}"
+
+    def to_dict(self) -> dict[str, JSON]:
+        return {"name": self.get_name(zarr_format=3)}
+
+    @classmethod
+    def check_dtype(cls: type[Self], dtype: TDType) -> TypeGuard[np.dtypes.VoidDType[Any]]:
+        """
+        Reject structured dtypes by ensuring that dtype.fields is None
+
+        Parameters
+        ----------
+        dtype : TDType
+            The dtype to check.
+
+        Returns
+        -------
+        Bool
+            True if the dtype matches, False otherwise.
+        """
+        return super().check_dtype(dtype) and dtype.fields is None
+
+    @classmethod
+    def check_json(cls, data: dict[str, JSON]) -> TypeGuard[dict[str, JSON]]:
+        # Overriding the base class implementation because the r* dtype
+        # does not have a name that will can appear in array metadata
+        # Instead, array metadata will contain names like "r8", "r16", etc
+        return (
+            isinstance(data, dict)
+            and "name" in data
+            and isinstance(data["name"], str)
+            and re.match(r"^r\d+$", data["name"])
+        )
+
+    def to_json_value(self, data: np.void, *, zarr_format: ZarrFormat) -> str:
+        return base64.standard_b64encode(data.tobytes()).decode("ascii")
+
+    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.void:
+        if check_json_str(data):
+            return self.to_dtype().type(base64.standard_b64decode(data))
+        raise DataTypeValidationError(f"Invalid type: {data}. Expected a string.")
+
+
+@dataclass(frozen=True, kw_only=True)
+class FixedLengthUnicodeString(DTypeWrapper[np.dtypes.StrDType[int], np.str_]):
+    dtype_cls = np.dtypes.StrDType[int]
+    _zarr_v3_name = "numpy.static_unicode_string"
+    item_size_bits: ClassVar[int] = 32  # UCS4 is 32 bits per code point
+    endianness: Endianness | None = "native"
+    length: int = 1
+
+    @classmethod
+    def _from_dtype_unsafe(cls, dtype: np.dtypes.StrDType[int]) -> Self:
+        return cls(
+            length=dtype.itemsize // (cls.item_size_bits // 8),
+            endianness=endianness_from_numpy_str(dtype.byteorder),
+        )
+
+    def to_dtype(self) -> np.dtypes.StrDType[int]:
+        return self.dtype_cls(self.length).newbyteorder(endianness_to_numpy_str(self.endianness))
+
+    def default_value(self) -> np.str_:
+        return np.str_("")
+
+    def to_dict(self) -> dict[str, JSON]:
+        return {"name": self.get_name(zarr_format=3), "configuration": {"length": self.length}}
+
+    def to_json_value(self, data: np.str_, *, zarr_format: ZarrFormat) -> str:
+        return str(data)
+
+    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.str_:
+        if not check_json_str(data):
+            raise TypeError(f"Invalid type: {data}. Expected a string.")
+        return self.cast_value(data)
+
+
+if _NUMPY_SUPPORTS_VLEN_STRING:
+
+    @dataclass(frozen=True, kw_only=True)
+    class VariableLengthString(DTypeWrapper[np.dtypes.StringDType, str]):
+        dtype_cls = np.dtypes.StringDType
+        _zarr_v3_name = "numpy.vlen_string"
+
+        @classmethod
+        def _from_dtype_unsafe(cls, dtype: np.dtypes.StringDType) -> Self:
+            return cls()
+
+        def default_value(self) -> str:
+            return ""
+
+        def cast_value(self, value: object) -> str:
+            return str(value)
+
+        def to_dict(self) -> dict[str, JSON]:
+            return {"name": self.get_name(zarr_format=3)}
+
+        def to_dtype(self) -> np.dtypes.StringDType:
+            return self.dtype_cls()
+
+        def to_json_value(self, data: object, *, zarr_format: ZarrFormat) -> str:
+            return str(data)
+
+        def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> str:
+            if not check_json_str(data):
+                raise TypeError(f"Invalid type: {data}. Expected a string.")
+            return self.cast_value(data)
+
+else:
+
+    @dataclass(frozen=True, kw_only=True)
+    class VariableLengthString(DTypeWrapper[np.dtypes.ObjectDType, str]):
+        dtype_cls = np.dtypes.ObjectDType
+        _zarr_v3_name = "numpy.vlen_string"
+
+        @classmethod
+        def _from_dtype_unsafe(cls, dtype: np.dtypes.ObjectDType) -> Self:
+            return cls()
+
+        def to_dtype(self) -> np.dtypes.ObjectDType:
+            return self.dtype_cls()
+
+        def cast_value(self, value: object) -> str:
+            return str(value)
+
+        def default_value(self) -> str:
+            return ""
+
+        def to_dict(self) -> dict[str, JSON]:
+            return {"name": self.get_name(zarr_format=3)}
+
+        def to_json_value(self, data: str, *, zarr_format: ZarrFormat) -> str:
+            return data
+
+        def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> str:
+            """
+            String literals pass through
+            """
+            if not check_json_str(data):
+                raise TypeError(f"Invalid type: {data}. Expected a string.")
+            return data
+
+
+DateUnit = Literal["Y", "M", "W", "D"]
+TimeUnit = Literal["h", "m", "s", "ms", "us", "μs", "ns", "ps", "fs", "as"]
+
+
+@dataclass(frozen=True, kw_only=True)
+class DateTime64(DTypeWrapper[np.dtypes.DateTime64DType, np.datetime64]):
+    dtype_cls = np.dtypes.DateTime64DType
+    _zarr_v3_name = "numpy.datetime64"
+    unit: DateUnit | TimeUnit = "s"
+    endianness: Endianness = "native"
+
+    def default_value(self) -> np.datetime64:
+        return np.datetime64("NaT")
+
+    def to_dict(self) -> dict[str, JSON]:
+        return {"name": self.get_name(zarr_format=3), "configuration": {"unit": self.unit}}
+
+    @classmethod
+    def _from_dtype_unsafe(cls, dtype: np.dtypes.DateTime64DType) -> Self:
+        unit = dtype.name[dtype.name.rfind("[") + 1 : dtype.name.rfind("]")]
+        if unit not in get_args(DateUnit | TimeUnit):
+            raise DataTypeValidationError('Invalid unit for "numpy.datetime64"')
+        return cls(unit=unit, endianness=endianness_from_numpy_str(dtype.byteorder))
+
+    def cast_value(self, value: object) -> np.datetime64:
+        return self.to_dtype().type(value, self.unit)
+
+    def to_dtype(self) -> np.dtypes.DateTime64DType:
+        # Numpy does not allow creating datetime64 via
+        # np.dtypes.DateTime64Dtype()
+        return np.dtype(f"datetime64[{self.unit}]").newbyteorder(
+            endianness_to_numpy_str(self.endianness)
+        )
+
+    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.datetime64:
+        if check_json_int(data):
+            return datetime_from_json(data, self.unit)
+        raise TypeError(f"Invalid type: {data}. Expected an integer.")
+
+    def to_json_value(self, data: np.datetime64, *, zarr_format: ZarrFormat) -> int:
+        return datetime_to_json(data)
+
+
+@dataclass(frozen=True, kw_only=True)
+class Structured(DTypeWrapper[np.dtypes.VoidDType, np.void]):
+    dtype_cls = np.dtypes.VoidDType
+    _zarr_v3_name = "numpy.structured"
+    fields: tuple[tuple[str, DTypeWrapper[Any, Any]], ...]
+
+    def default_value(self) -> np.void:
+        return self.cast_value(0)
+
+    def cast_value(self, value: object) -> np.void:
+        return np.array([value], dtype=self.to_dtype())[0]
+
+    @classmethod
+    def check_dtype(cls, dtype: np.dtypes.DTypeLike) -> TypeGuard[np.dtypes.VoidDType]:
+        """
+        Check that this dtype is a numpy structured dtype
+
+        Parameters
+        ----------
+        dtype : np.dtypes.DTypeLike
+            The dtype to check.
+
+        Returns
+        -------
+        TypeGuard[np.dtypes.VoidDType]
+            True if the dtype matches, False otherwise.
+        """
+        return super().check_dtype(dtype) and dtype.fields is not None
+
+    @classmethod
+    def _from_dtype_unsafe(cls, dtype: np.dtypes.VoidDType) -> Self:
+        from zarr.core.dtype import get_data_type_from_numpy
+
+        fields: list[tuple[str, DTypeWrapper[Any, Any]]] = []
+
+        if dtype.fields is None:
+            raise ValueError("numpy dtype has no fields")
+
+        for key, (dtype_instance, _) in dtype.fields.items():
+            dtype_wrapped = get_data_type_from_numpy(dtype_instance)
+            fields.append((key, dtype_wrapped))
+
+        return cls(fields=tuple(fields))
+
+    def get_name(self, zarr_format: ZarrFormat) -> str | list[tuple[str, str]]:
+        if zarr_format == 2:
+            return [[k, d.get_name(zarr_format=2)] for k, d in self.fields]
+        return self._zarr_v3_name
+
+    def to_dict(self) -> dict[str, JSON]:
+        base_dict = {"name": self.get_name(zarr_format=3)}
+        field_configs = [(f_name, f_dtype.to_dict()) for f_name, f_dtype in self.fields]
+        base_dict["configuration"] = {"fields": field_configs}
+        return base_dict
+
+    @classmethod
+    def check_json(cls, data: JSON) -> bool:
+        return (
+            isinstance(data, dict)
+            and "name" in data
+            and "configuration" in data
+            and "fields" in data["configuration"]
+        )
+
+    @classmethod
+    def from_dict(cls, data: dict[str, JSON]) -> Self:
+        if cls.check_json(data):
+            from zarr.core.dtype import get_data_type_from_dict
+
+            fields = tuple(
+                (f_name, get_data_type_from_dict(f_dtype))
+                for f_name, f_dtype in data["configuration"]["fields"]
+            )
+            return cls(fields=fields)
+        raise DataTypeValidationError(f"Invalid JSON representation of data type {cls}.")
+
+    def to_dtype(self) -> np.dtypes.VoidDType:
+        return cast(np.void, np.dtype([(key, dtype.to_dtype()) for (key, dtype) in self.fields]))
+
+    def to_json_value(self, data: np.generic, *, zarr_format: ZarrFormat) -> str:
+        return bytes_to_json(data.tobytes(), zarr_format)
+
+    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.void:
+        if not check_json_str(data):
+            raise TypeError(f"Invalid type: {data}. Expected a string.")
+        as_bytes = bytes_from_json(data, zarr_format=zarr_format)
+        dtype = self.to_dtype()
+        return cast(np.void, np.array([as_bytes], dtype=dtype.str).view(dtype)[0])
diff --git a/src/zarr/core/dtype/common.py b/src/zarr/core/dtype/common.py
new file mode 100644
index 0000000000..1dbf22c3c2
--- /dev/null
+++ b/src/zarr/core/dtype/common.py
@@ -0,0 +1,602 @@
+from __future__ import annotations
+
+import base64
+from collections.abc import Sequence
+from typing import TYPE_CHECKING, Any, Literal, TypeGuard, cast, get_args
+
+import numpy as np
+
+if TYPE_CHECKING:
+    from zarr.core.common import JSON, ZarrFormat
+    from zarr.core.dtype._numpy import DateUnit, TimeUnit
+
+Endianness = Literal["little", "big", "native"]
+EndiannessNumpy = Literal[">", "<", "=", "|"]
+JSONFloat = float | Literal["NaN", "Infinity", "-Infinity"]
+
+_NUMPY_SUPPORTS_VLEN_STRING = hasattr(np.dtypes, "StringDType")
+
+
+class DataTypeValidationError(ValueError): ...
+
+
+def endianness_to_numpy_str(endianness: Endianness | None) -> EndiannessNumpy:
+    """
+    Convert an endianness literal to its numpy string representation.
+
+    Parameters
+    ----------
+    endianness : Endianness or None
+        The endianness to convert.
+
+    Returns
+    -------
+    Literal[">", "<", "=", "|"]
+        The numpy string representation of the endianness.
+
+    Raises
+    ------
+    ValueError
+        If the endianness is invalid.
+    """
+    match endianness:
+        case "little":
+            return "<"
+        case "big":
+            return ">"
+        case "native":
+            return "="
+        case None:
+            return "|"
+    raise ValueError(
+        f"Invalid endianness: {endianness}. Expected one of {get_args(Endianness)} or None"
+    )
+
+
+def endianness_from_numpy_str(endianness: EndiannessNumpy) -> Endianness | None:
+    """
+    Convert a numpy endianness string literal to a human-readable literal value.
+
+    Parameters
+    ----------
+    endianness : Literal[">", "<", "=", "|"]
+        The numpy string representation of the endianness.
+
+    Returns
+    -------
+    Endianness or None
+        The human-readable representation of the endianness.
+
+    Raises
+    ------
+    ValueError
+        If the endianness is invalid.
+    """
+    match endianness:
+        case "<":
+            return "little"
+        case ">":
+            return "big"
+        case "=":
+            return "native"
+        case "|":
+            return None
+    raise ValueError(
+        f"Invalid endianness: {endianness}. Expected one of {get_args(EndiannessNumpy)}"
+    )
+
+
+def check_json_bool(data: JSON) -> TypeGuard[bool]:
+    """
+    Check if a JSON value is a boolean.
+
+    Parameters
+    ----------
+    data : JSON
+        The JSON value to check.
+
+    Returns
+    -------
+    Bool
+        True if the data is a boolean, False otherwise.
+    """
+    return bool(isinstance(data, bool))
+
+
+def check_json_str(data: JSON) -> TypeGuard[str]:
+    """
+    Check if a JSON value is a string.
+
+    Parameters
+    ----------
+    data : JSON
+        The JSON value to check.
+
+    Returns
+    -------
+    Bool
+        True if the data is a string, False otherwise.
+    """
+    return bool(isinstance(data, str))
+
+
+def check_json_int(data: JSON) -> TypeGuard[int]:
+    """
+    Check if a JSON value is an integer.
+
+    Parameters
+    ----------
+    data : JSON
+        The JSON value to check.
+
+    Returns
+    -------
+    Bool
+        True if the data is an integer, False otherwise.
+    """
+    return bool(isinstance(data, int))
+
+
+def check_json_float_v2(data: JSON) -> TypeGuard[JSONFloat]:
+    """
+    Check if a JSON value represents a float (v2).
+
+    Parameters
+    ----------
+    data : JSON
+        The JSON value to check.
+
+    Returns
+    -------
+    Bool
+        True if the data is a float, False otherwise.
+    """
+    if data == "NaN" or data == "Infinity" or data == "-Infinity":
+        return True
+    return isinstance(data, float | int)
+
+
+def check_json_float_v3(data: JSON) -> TypeGuard[JSONFloat]:
+    """
+    Check if a JSON value represents a float (v3).
+
+    Parameters
+    ----------
+    data : JSON
+        The JSON value to check.
+
+    Returns
+    -------
+    Bool
+        True if the data is a float, False otherwise.
+    """
+    # TODO: handle the special JSON serialization of different NaN values
+    return check_json_float_v2(data)
+
+
+def check_json_float(data: JSON, zarr_format: ZarrFormat) -> TypeGuard[float]:
+    """
+    Check if a JSON value represents a float based on zarr format.
+
+    Parameters
+    ----------
+    data : JSON
+        The JSON value to check.
+    zarr_format : ZarrFormat
+        The zarr format version.
+
+    Returns
+    -------
+    Bool
+        True if the data is a float, False otherwise.
+    """
+    if zarr_format == 2:
+        return check_json_float_v2(data)
+    else:
+        return check_json_float_v3(data)
+
+
+def check_json_complex_float_v3(data: JSON) -> TypeGuard[tuple[JSONFloat, JSONFloat]]:
+    """
+    Check if a JSON value represents a complex float, as per the zarr v3 spec
+
+    Parameters
+    ----------
+    data : JSON
+        The JSON value to check.
+
+    Returns
+    -------
+    Bool
+        True if the data is a complex float, False otherwise.
+    """
+    return (
+        not isinstance(data, str)
+        and isinstance(data, Sequence)
+        and len(data) == 2
+        and check_json_float_v3(data[0])
+        and check_json_float_v3(data[1])
+    )
+
+
+def check_json_complex_float_v2(data: JSON) -> TypeGuard[tuple[JSONFloat, JSONFloat]]:
+    """
+    Check if a JSON value represents a complex float, as per the behavior of zarr-python 2.x
+
+    Parameters
+    ----------
+    data : JSON
+        The JSON value to check.
+
+    Returns
+    -------
+    Bool
+        True if the data is a complex float, False otherwise.
+    """
+    return (
+        not isinstance(data, str)
+        and isinstance(data, Sequence)
+        and len(data) == 2
+        and check_json_float_v2(data[0])
+        and check_json_float_v2(data[1])
+    )
+
+
+def check_json_complex_float(
+    data: JSON, zarr_format: ZarrFormat
+) -> TypeGuard[tuple[JSONFloat, JSONFloat]]:
+    """
+    Check if a JSON value represents a complex float based on zarr format.
+
+    Parameters
+    ----------
+    data : JSON
+        The JSON value to check.
+    zarr_format : ZarrFormat
+        The zarr format version.
+
+    Returns
+    -------
+    Bool
+        True if the data represents a complex float, False otherwise.
+    """
+    if zarr_format == 2:
+        return check_json_complex_float_v2(data)
+    return check_json_complex_float_v3(data)
+
+
+def float_to_json_v2(data: float | np.floating[Any]) -> JSONFloat:
+    """
+    Convert a float to JSON (v2).
+
+    Parameters
+    ----------
+    data : float or np.floating
+        The float value to convert.
+
+    Returns
+    -------
+    JSONFloat
+        The JSON representation of the float.
+    """
+    if np.isnan(data):
+        return "NaN"
+    elif np.isinf(data):
+        return "Infinity" if data > 0 else "-Infinity"
+    return float(data)
+
+
+def float_to_json_v3(data: float | np.floating[Any]) -> JSONFloat:
+    """
+    Convert a float to JSON (v3).
+
+    Parameters
+    ----------
+    data : float or np.floating
+        The float value to convert.
+
+    Returns
+    -------
+    JSONFloat
+        The JSON representation of the float.
+    """
+    # v3 can in principle handle distinct NaN values, but numpy does not represent these explicitly
+    # so we just reuse the v2 routine here
+    return float_to_json_v2(data)
+
+
+def float_to_json(data: float | np.floating[Any], zarr_format: ZarrFormat) -> JSONFloat:
+    """
+    Convert a float to JSON, parametrized by the zarr format version.
+
+    Parameters
+    ----------
+    data : float or np.floating
+        The float value to convert.
+    zarr_format : ZarrFormat
+        The zarr format version.
+
+    Returns
+    -------
+    JSONFloat
+        The JSON representation of the float.
+    """
+    if zarr_format == 2:
+        return float_to_json_v2(data)
+    else:
+        return float_to_json_v3(data)
+    raise ValueError(f"Invalid zarr format: {zarr_format}. Expected 2 or 3.")
+
+
+def complex_to_json_v2(data: complex | np.complexfloating[Any, Any]) -> tuple[JSONFloat, JSONFloat]:
+    """
+    Convert a complex number to JSON (v2).
+
+    Parameters
+    ----------
+    data : complex or np.complexfloating
+        The complex value to convert.
+
+    Returns
+    -------
+    tuple[JSONFloat, JSONFloat]
+        The JSON representation of the complex number.
+    """
+    return float_to_json_v2(data.real), float_to_json_v2(data.imag)
+
+
+def complex_to_json_v3(data: complex | np.complexfloating[Any, Any]) -> tuple[JSONFloat, JSONFloat]:
+    """
+    Convert a complex number to JSON (v3).
+
+    Parameters
+    ----------
+    data : complex or np.complexfloating
+        The complex value to convert.
+
+    Returns
+    -------
+    tuple[JSONFloat, JSONFloat]
+        The JSON representation of the complex number.
+    """
+    return float_to_json_v3(data.real), float_to_json_v3(data.imag)
+
+
+def complex_to_json(
+    data: complex | np.complexfloating[Any, Any], zarr_format: ZarrFormat
+) -> tuple[JSONFloat, JSONFloat]:
+    """
+    Convert a complex number to JSON, parametrized by the zarr format version.
+
+    Parameters
+    ----------
+    data : complex or np.complexfloating
+        The complex value to convert.
+    zarr_format : ZarrFormat
+        The zarr format version.
+
+    Returns
+    -------
+    tuple[JSONFloat, JSONFloat] or JSONFloat
+        The JSON representation of the complex number.
+    """
+    if zarr_format == 2:
+        return complex_to_json_v2(data)
+    else:
+        return complex_to_json_v3(data)
+    raise ValueError(f"Invalid zarr format: {zarr_format}. Expected 2 or 3.")
+
+
+def bytes_to_json(data: bytes, zarr_format: ZarrFormat) -> str:
+    """
+    Convert bytes to JSON.
+
+    Parameters
+    ----------
+    data : bytes
+        The structured scalar value to convert.
+    zarr_format : ZarrFormat
+        The zarr format version.
+
+    Returns
+    -------
+    str
+        The bytes encoded as ascii using the base64 alphabet.
+    """
+    if zarr_format == 2:
+        return base64.b64encode(data).decode("ascii")
+    raise NotImplementedError(f"Invalid zarr format: {zarr_format}. Expected 2.")
+
+
+def bytes_from_json(data: str, zarr_format: ZarrFormat) -> bytes:
+    """
+    Convert a JSON string to bytes
+
+    Parameters
+    ----------
+    data : str
+        The JSON string to convert.
+    zarr_format : ZarrFormat
+        The zarr format version.
+
+    Returns
+    -------
+    bytes
+        The bytes.
+    """
+    if zarr_format == 2:
+        return base64.b64decode(data.encode("ascii"))
+    raise NotImplementedError(f"Invalid zarr format: {zarr_format}. Expected 2.")
+
+
+def float_from_json_v2(data: JSONFloat) -> float:
+    """
+    Convert a JSON float to a float (Zarr v2).
+
+    Parameters
+    ----------
+    data : JSONFloat
+        The JSON float to convert.
+
+    Returns
+    -------
+    float
+        The float value.
+    """
+    match data:
+        case "NaN":
+            return float("nan")
+        case "Infinity":
+            return float("inf")
+        case "-Infinity":
+            return float("-inf")
+        case _:
+            return float(data)
+
+
+def float_from_json_v3(data: JSONFloat) -> float:
+    """
+    Convert a JSON float to a float (v3).
+
+    Parameters
+    ----------
+    data : JSONFloat
+        The JSON float to convert.
+
+    Returns
+    -------
+    float
+        The float value.
+    """
+    # todo: support the v3-specific NaN handling
+    return float_from_json_v2(data)
+
+
+def float_from_json(data: JSONFloat, zarr_format: ZarrFormat) -> float:
+    """
+    Convert a JSON float to a float based on zarr format.
+
+    Parameters
+    ----------
+    data : JSONFloat
+        The JSON float to convert.
+    zarr_format : ZarrFormat
+        The zarr format version.
+
+    Returns
+    -------
+    float
+        The float value.
+    """
+    if zarr_format == 2:
+        return float_from_json_v2(data)
+    else:
+        return float_from_json_v3(data)
+
+
+def complex_from_json_v2(
+    data: tuple[JSONFloat, JSONFloat], dtype: np.dtypes.Complex64DType | np.dtypes.Complex128DType
+) -> np.complexfloating[Any, Any]:
+    """
+    Convert a JSON complex float to a complex number (v2).
+
+    Parameters
+    ----------
+    data : tuple[JSONFloat, JSONFloat]
+        The JSON complex float to convert.
+    dtype : Any
+        The numpy dtype.
+
+    Returns
+    -------
+    np.complexfloating
+        The complex number.
+    """
+    return dtype.type(complex(float_from_json_v2(data[0]), float_from_json_v2(data[1])))
+
+
+def complex_from_json_v3(
+    data: tuple[JSONFloat, JSONFloat], dtype: np.dtypes.Complex64DType | np.dtypes.Complex128DType
+) -> np.complexfloating[Any, Any]:
+    """
+    Convert a JSON complex float to a complex number (v3).
+
+    Parameters
+    ----------
+    data : tuple[JSONFloat, JSONFloat]
+        The JSON complex float to convert.
+    dtype : Any
+        The numpy dtype.
+
+    Returns
+    -------
+    np.complexfloating
+        The complex number.
+    """
+    return dtype.type(complex(float_from_json_v3(data[0]), float_from_json_v3(data[1])))
+
+
+def complex_from_json(
+    data: tuple[JSONFloat, JSONFloat], dtype: Any, zarr_format: ZarrFormat
+) -> np.complexfloating[Any, Any]:
+    """
+    Convert a JSON complex float to a complex number based on zarr format.
+
+    Parameters
+    ----------
+    data : tuple[JSONFloat, JSONFloat]
+        The JSON complex float to convert.
+    dtype : Any
+        The numpy dtype.
+    zarr_format : ZarrFormat
+        The zarr format version.
+
+    Returns
+    -------
+    np.complexfloating
+        The complex number.
+    """
+    if zarr_format == 2:
+        return complex_from_json_v2(data, dtype)
+    else:
+        if check_json_complex_float_v3(data):
+            return complex_from_json_v3(data, dtype)
+        else:
+            raise TypeError(f"Invalid type: {data}. Expected a sequence of two numbers.")
+    raise ValueError(f"Invalid zarr format: {zarr_format}. Expected 2 or 3.")
+
+
+def datetime_to_json(data: np.datetime64) -> int:
+    """
+    Convert a datetime64 to a JSON integer.
+
+    Parameters
+    ----------
+    data : np.datetime64
+        The datetime64 value to convert.
+
+    Returns
+    -------
+    int
+        The JSON representation of the datetime64.
+    """
+    return data.view(np.int64).item()
+
+
+def datetime_from_json(data: int, unit: DateUnit | TimeUnit) -> np.datetime64:
+    """
+    Convert a JSON integer to a datetime64.
+
+    Parameters
+    ----------
+    data : int
+        The JSON integer to convert.
+    unit : DateUnit or TimeUnit
+        The unit of the datetime64.
+
+    Returns
+    -------
+    np.datetime64
+        The datetime64 value.
+    """
+    return cast(np.datetime64, np.int64(data).view(f"datetime64[{unit}]"))
diff --git a/src/zarr/core/dtype/registry.py b/src/zarr/core/dtype/registry.py
new file mode 100644
index 0000000000..d4f1f03258
--- /dev/null
+++ b/src/zarr/core/dtype/registry.py
@@ -0,0 +1,50 @@
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from typing import TYPE_CHECKING, Any, Self
+
+from zarr.core.dtype.common import DataTypeValidationError
+
+if TYPE_CHECKING:
+    from importlib.metadata import EntryPoint
+
+    from zarr.core.common import JSON
+    from zarr.core.dtype.wrapper import DTypeWrapper, TDType
+
+
+@dataclass(frozen=True, kw_only=True)
+class DataTypeRegistry:
+    contents: dict[str, type[DTypeWrapper[Any, Any]]] = field(default_factory=dict, init=False)
+    lazy_load_list: list[EntryPoint] = field(default_factory=list, init=False)
+
+    def lazy_load(self) -> None:
+        for e in self.lazy_load_list:
+            self.register(e.name, e.load())
+
+        self.lazy_load_list.clear()
+
+    def register(self: Self, key: str, cls: type[DTypeWrapper[Any, Any]]) -> None:
+        # don't register the same dtype twice
+        if key not in self.contents or self.contents[key] != cls:
+            self.contents[key] = cls
+
+    def get(self, key: str) -> type[DTypeWrapper[Any, Any]]:
+        return self.contents[key]
+
+    def match_dtype(self, dtype: TDType) -> DTypeWrapper[Any, Any]:
+        self.lazy_load()
+        for val in self.contents.values():
+            try:
+                return val.from_dtype(dtype)
+            except DataTypeValidationError:
+                pass
+        raise ValueError(f"No data type wrapper found that matches dtype '{dtype}'")
+
+    def match_json(self, data: JSON) -> DTypeWrapper[Any, Any]:
+        self.lazy_load()
+        for val in self.contents.values():
+            try:
+                return val.from_dict(data)
+            except DataTypeValidationError:
+                pass
+        raise ValueError(f"No data type wrapper found that matches {data}")
diff --git a/src/zarr/core/dtype/wrapper.py b/src/zarr/core/dtype/wrapper.py
new file mode 100644
index 0000000000..002bd100e9
--- /dev/null
+++ b/src/zarr/core/dtype/wrapper.py
@@ -0,0 +1,279 @@
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+from typing import TYPE_CHECKING, Any, ClassVar, Generic, Self, TypeGuard, TypeVar, cast
+
+import numpy as np
+
+from zarr.abc.metadata import Metadata
+from zarr.core.dtype.common import DataTypeValidationError
+
+if TYPE_CHECKING:
+    from zarr.core.common import JSON, ZarrFormat
+
+TScalar = TypeVar("TScalar", bound=np.generic | str)
+# TODO: figure out an interface or protocol that non-numpy dtypes can use
+TDType = TypeVar("TDType", bound=np.dtype[Any])
+
+
+@dataclass(frozen=True, kw_only=True)
+class DTypeWrapper(Generic[TDType, TScalar], ABC, Metadata):
+    """
+    Abstract base class for wrapping numpy dtypes.
+
+    Attributes
+    ----------
+    dtype_cls : ClassVar[type[TDType]]
+        The numpy dtype class. This is a class variable. Instances of this class cannot set it.
+    _zarr_v3_name : ClassVar[str]
+        The name given to the wrapped data type by a zarr v3 data type specification. Note that this
+        is not necessarily the same name that will appear in metadata documents, as some data types
+        have names that depend on their configuration.
+    """
+
+    # this class will create a numpy dtype
+    # mypy currently disallows class variables to contain type parameters
+    # but it seems like it should be OK for us to use it here:
+    # https://github.com/python/typing/discussions/1424#discussioncomment-7989934
+    dtype_cls: ClassVar[type[TDType]]  # type: ignore[misc]
+    _zarr_v3_name: ClassVar[str]
+
+    @classmethod
+    @abstractmethod
+    def _from_dtype_unsafe(cls: type[Self], dtype: TDType) -> Self:
+        """
+        Wrap a native dtype without checking.
+
+        Parameters
+        ----------
+        dtype : TDType
+            The native dtype to wrap.
+
+        Returns
+        -------
+        Self
+            The wrapped dtype.
+        """
+        raise NotImplementedError
+
+    @classmethod
+    def from_dtype(cls: type[Self], dtype: TDType) -> Self:
+        """
+        Wrap a dtype object.
+
+        Parameters
+        ----------
+        dtype : TDType
+            The dtype object to wrap.
+
+        Returns
+        -------
+        Self
+            The wrapped dtype.
+
+        Raises
+        ------
+        TypeError
+            If the dtype does not match the dtype_cls class attribute.
+        """
+        if cls.check_dtype(dtype):
+            return cls._from_dtype_unsafe(dtype)
+        raise DataTypeValidationError(
+            f"Invalid dtype: {dtype}. Expected an instance of {cls.dtype_cls}."
+        )
+
+    @abstractmethod
+    def to_dtype(self: Self) -> TDType:
+        """
+        Return an instance of the wrapped dtype.
+
+        Returns
+        -------
+        TDType
+            The unwrapped dtype.
+        """
+        raise NotImplementedError
+
+    @abstractmethod
+    def to_dict(self) -> dict[str, JSON]:
+        """
+        Convert the wrapped data type to a dictionary.
+
+        Returns
+        -------
+        dict[str, JSON]
+            The dictionary representation of the wrapped data type
+        """
+        raise NotImplementedError
+
+    def cast_value(self: Self, value: object) -> TScalar:
+        """
+        Cast a value to an instance of the scalar type.
+        This implementation assumes a numpy-style dtype class that has a
+        ``type`` method for casting scalars. Non-numpy dtypes will need to
+        override this method.
+
+        Parameters
+        ----------
+        value : object
+            The value to cast.
+
+        Returns
+        -------
+        TScalar
+            The cast value.
+        """
+        return cast(TScalar, self.to_dtype().type(value))
+
+    @abstractmethod
+    def default_value(self) -> TScalar:
+        """
+        Get the default value for the wrapped data type. This is a method, rather than an attribute,
+        because the default value for some data types may depend on parameters that are not known
+        until a concrete data type is wrapped.
+
+        Returns
+        -------
+        TScalar
+            The default value for this data type.
+        """
+        ...
+
+    @classmethod
+    def check_dtype(cls: type[Self], dtype: TDType) -> TypeGuard[TDType]:
+        """
+        Check that a data type matches the dtype_cls class attribute. Used as a type guard.
+
+        Parameters
+        ----------
+        dtype : TDType
+            The dtype to check.
+
+        Returns
+        -------
+        Bool
+            True if the dtype matches, False otherwise.
+        """
+        return type(dtype) is cls.dtype_cls
+
+    @classmethod
+    def check_json(cls: type[Self], data: dict[str, JSON]) -> TypeGuard[dict[str, JSON]]:
+        """
+        Check that a JSON representation of a data type matches the dtype_cls class attribute. Used
+        as a type guard. This base implementation checks that the input is a dictionary,
+        that the key "name" is in that dictionary, and that the value of "name"
+        matches the _zarr_v3_name class attribute.
+
+        Parameters
+        ----------
+        data : JSON
+            The JSON representation of the data type.
+
+        Returns
+        -------
+        Bool
+            True if the JSON representation matches, False otherwise.
+        """
+        return "name" in data and data["name"] == cls._zarr_v3_name
+
+    @classmethod
+    def from_dict(cls: type[Self], data: dict[str, JSON]) -> Self:
+        """
+        Wrap a JSON representation of a data type.
+
+        Parameters
+        ----------
+        data : dict[str, JSON]
+            The JSON representation of the data type.
+
+        Returns
+        -------
+        Self
+            The wrapped data type.
+        """
+        if cls.check_json(data):
+            return cls._from_json_unsafe(data)
+        raise DataTypeValidationError(f"Invalid JSON representation of data type {cls}.")
+
+    @classmethod
+    def _from_json_unsafe(cls: type[Self], data: dict[str, JSON]) -> Self:
+        """
+        Wrap a JSON representation of a data type.
+
+        Parameters
+        ----------
+        data : dict[str, JSON]
+            The JSON representation of the data type.
+
+        Returns
+        -------
+        Self
+            The wrapped data type.
+        """
+        config = data.get("configuration", {})
+        return cls(**config)
+
+    def get_name(self, zarr_format: ZarrFormat) -> str:
+        """
+        Return the name of the wrapped data type.
+
+        Parameters
+        ----------
+        zarr_format : ZarrFormat
+            The zarr format version.
+
+        Returns
+        -------
+        str
+            The name of the wrapped data type.
+
+        Notes
+        -----
+        This is a method, rather than an attribute, because the name of the data type may depend on
+        parameters that are not known until a concrete data type is wrapped.
+
+        As the names of data types vary between zarr versions, this method takes a ``zarr_format``
+        parameter
+        """
+        if zarr_format == 2:
+            return self.to_dtype().str
+        return self._zarr_v3_name
+
+    @abstractmethod
+    def to_json_value(self, data: TScalar, *, zarr_format: ZarrFormat) -> JSON:
+        """
+        Convert a single value to JSON-serializable format.
+
+        Parameters
+        ----------
+        data : object
+            The value to convert.
+        zarr_format : ZarrFormat
+            The zarr format version.
+
+        Returns
+        -------
+        JSON
+            The JSON-serializable format.
+        """
+        raise NotImplementedError
+
+    @abstractmethod
+    def from_json_value(self: Self, data: JSON, *, zarr_format: ZarrFormat) -> TScalar:
+        """
+        Read a JSON-serializable value as a scalar.
+
+        Parameters
+        ----------
+        data : JSON
+            The JSON-serializable value.
+        zarr_format : ZarrFormat
+            The zarr format version.
+
+        Returns
+        -------
+        TScalar
+            The numpy scalar.
+        """
+        raise NotImplementedError
diff --git a/src/zarr/core/metadata/dtype.py b/src/zarr/core/metadata/dtype.py
index 33aa22b398..e69de29bb2 100644
--- a/src/zarr/core/metadata/dtype.py
+++ b/src/zarr/core/metadata/dtype.py
@@ -1,808 +0,0 @@
-from __future__ import annotations
-
-import base64
-from abc import ABC, abstractmethod
-from collections.abc import Sequence
-from dataclasses import dataclass, field, replace
-from importlib.metadata import EntryPoint
-from typing import (
-    TYPE_CHECKING,
-    Any,
-    ClassVar,
-    Generic,
-    Literal,
-    Self,
-    TypeGuard,
-    TypeVar,
-    cast,
-    get_args,
-)
-
-import numpy as np
-import numpy.typing as npt
-
-from zarr.abc.metadata import Metadata
-from zarr.core.strings import _NUMPY_SUPPORTS_VLEN_STRING
-
-if TYPE_CHECKING:
-    from zarr.core.common import JSON, ZarrFormat
-
-Endianness = Literal["little", "big", "native"]
-DataTypeFlavor = Literal["boolean", "numeric", "string", "bytes"]
-JSONFloat = float | Literal["NaN", "Infinity", "-Infinity"]
-
-
-def endianness_to_numpy_str(endianness: Endianness | None) -> Literal[">", "<", "=", "|"]:
-    match endianness:
-        case "little":
-            return "<"
-        case "big":
-            return ">"
-        case "native":
-            return "="
-        case None:
-            return "|"
-    raise ValueError(
-        f"Invalid endianness: {endianness}. Expected one of {get_args(endianness)} or None"
-    )
-
-
-def check_json_bool(data: JSON) -> TypeGuard[bool]:
-    """
-    Check if a JSON value represents a boolean.
-    """
-    return bool(isinstance(data, bool))
-
-
-def check_json_str(data: JSON) -> TypeGuard[str]:
-    """
-    Check if a JSON value represents a string.
-    """
-    return bool(isinstance(data, str))
-
-
-def check_json_int(data: JSON) -> TypeGuard[int]:
-    """
-    Check if a JSON value represents an integer.
-    """
-    return bool(isinstance(data, int))
-
-
-def check_json_float_v2(data: JSON) -> TypeGuard[float]:
-    if data == "NaN" or data == "Infinity" or data == "-Infinity":
-        return True
-    else:
-        return bool(isinstance(data, float | int))
-
-
-def check_json_float_v3(data: JSON) -> TypeGuard[float]:
-    # TODO: handle the special JSON serialization of different NaN values
-    return check_json_float_v2(data)
-
-
-def check_json_float(data: JSON, zarr_format: ZarrFormat) -> TypeGuard[float]:
-    if zarr_format == 2:
-        return check_json_float_v2(data)
-    else:
-        return check_json_float_v3(data)
-
-
-def check_json_complex_float_v3(data: JSON) -> TypeGuard[tuple[JSONFloat, JSONFloat]]:
-    """
-    Check if a JSON value represents a complex float, as per the zarr v3 spec
-    """
-    return (
-        not isinstance(data, str)
-        and isinstance(data, Sequence)
-        and len(data) == 2
-        and check_json_float_v3(data[0])
-        and check_json_float_v3(data[1])
-    )
-
-
-def check_json_complex_float_v2(data: JSON) -> TypeGuard[tuple[JSONFloat, JSONFloat]]:
-    """
-    Check if a JSON value represents a complex float, as per the behavior of zarr-python 2.x
-    """
-    return (
-        not isinstance(data, str)
-        and isinstance(data, Sequence)
-        and len(data) == 2
-        and check_json_float_v2(data[0])
-        and check_json_float_v2(data[1])
-    )
-
-
-def check_json_complex_float(
-    data: JSON, zarr_format: ZarrFormat
-) -> TypeGuard[tuple[JSONFloat, JSONFloat]]:
-    if zarr_format == 2:
-        return check_json_complex_float_v2(data)
-    else:
-        return check_json_complex_float_v3(data)
-
-
-def float_to_json_v2(data: float | np.floating[Any]) -> JSONFloat:
-    if np.isnan(data):
-        return "NaN"
-    elif np.isinf(data):
-        return "Infinity" if data > 0 else "-Infinity"
-    return float(data)
-
-
-def float_to_json_v3(data: float | np.floating[Any]) -> JSONFloat:
-    # v3 can in principle handle distinct NaN values, but numpy does not represent these explicitly
-    # so we just reuse the v2 routine here
-    return float_to_json_v2(data)
-
-
-def float_to_json(data: float | np.floating[Any], zarr_format: ZarrFormat) -> JSONFloat:
-    """
-    convert a float to JSON as per the zarr v3 spec
-    """
-    if zarr_format == 2:
-        return float_to_json_v2(data)
-    else:
-        return float_to_json_v3(data)
-    raise ValueError(f"Invalid zarr format: {zarr_format}. Expected 2 or 3.")
-
-
-def complex_to_json_v2(data: complex | np.complexfloating[Any, Any]) -> tuple[JSONFloat, JSONFloat]:
-    return float_to_json_v2(data.real), float_to_json_v2(data.imag)
-
-
-def complex_to_json_v3(data: complex | np.complexfloating[Any, Any]) -> tuple[JSONFloat, JSONFloat]:
-    return float_to_json_v3(data.real), float_to_json_v3(data.imag)
-
-
-def complex_to_json(
-    data: complex | np.complexfloating[Any], zarr_format: ZarrFormat
-) -> tuple[JSONFloat, JSONFloat] | JSONFloat:
-    if zarr_format == 2:
-        return complex_to_json_v2(data)
-    else:
-        return complex_to_json_v3(data)
-    raise ValueError(f"Invalid zarr format: {zarr_format}. Expected 2 or 3.")
-
-
-def structured_scalar_to_json(data: bytes, zarr_format: ZarrFormat) -> str:
-    if zarr_format == 2:
-        return base64.b64encode(data).decode("ascii")
-    raise NotImplementedError(f"Invalid zarr format: {zarr_format}. Expected 2.")
-
-
-def structured_scalar_from_json(data: str, zarr_format: ZarrFormat) -> bytes:
-    if zarr_format == 2:
-        return base64.b64decode(data.encode("ascii"))
-    raise NotImplementedError(f"Invalid zarr format: {zarr_format}. Expected 2.")
-
-
-def float_from_json_v2(data: JSONFloat) -> float:
-    match data:
-        case "NaN":
-            return float("nan")
-        case "Infinity":
-            return float("inf")
-        case "-Infinity":
-            return float("-inf")
-        case _:
-            return float(data)
-
-
-def float_from_json_v3(data: JSONFloat) -> float:
-    # todo: support the v3-specific NaN handling
-    return float_from_json_v2(data)
-
-
-def float_from_json(data: JSONFloat, zarr_format: ZarrFormat) -> float:
-    if zarr_format == 2:
-        return float_from_json_v2(data)
-    else:
-        return float_from_json_v3(data)
-
-
-def complex_from_json_v2(data: JSONFloat, dtype: Any) -> np.complexfloating[Any, Any]:
-    return dtype.type(complex(*data))
-
-
-def complex_from_json_v3(
-    data: tuple[JSONFloat, JSONFloat], dtype: Any
-) -> np.complexfloating[Any, Any]:
-    return dtype.type(complex(*data))
-
-
-def complex_from_json(
-    data: tuple[JSONFloat, JSONFloat], dtype: Any, zarr_format: ZarrFormat
-) -> np.complexfloating:
-    if zarr_format == 2:
-        return complex_from_json_v2(data, dtype)
-    else:
-        if check_json_complex_float_v3(data):
-            return complex_from_json_v3(data, dtype)
-        else:
-            raise TypeError(f"Invalid type: {data}. Expected a sequence of two numbers.")
-    raise ValueError(f"Invalid zarr format: {zarr_format}. Expected 2 or 3.")
-
-
-def datetime_to_json(data: np.datetime64[Any]) -> int:
-    return data.view(np.int64).item()
-
-
-def datetime_from_json(data: int, unit: DateUnit | TimeUnit) -> np.datetime64[Any]:
-    return np.int64(data).view(f"datetime64[{unit}]")
-
-
-TScalar = TypeVar("TScalar", bound=np.generic | str, covariant=True)
-# TODO: figure out an interface or protocol that non-numpy dtypes can
-TDType = TypeVar("TDType", bound=np.dtype[Any])
-
-
-@dataclass(frozen=True, kw_only=True)
-class DTypeWrapper(Generic[TDType, TScalar], ABC, Metadata):
-    name: ClassVar[str]
-    dtype_cls: ClassVar[type[TDType]]  # this class will create a numpy dtype
-    endianness: Endianness | None = "native"
-
-    def to_dict(self) -> dict[str, JSON]:
-        return {"name": self.name}
-
-    def cast_value(self: Self, value: object) -> TScalar:
-        return cast(TScalar, self.unwrap().type(value))
-
-    @abstractmethod
-    def default_value(self) -> TScalar: ...
-
-    @classmethod
-    def check_dtype(cls: type[Self], dtype: TDType) -> TypeGuard[TDType]:
-        """
-        Check that a dtype matches the dtype_cls class attribute
-        """
-        return type(dtype) is cls.dtype_cls
-
-    @classmethod
-    def wrap(cls: type[Self], dtype: TDType) -> Self:
-        if cls.check_dtype(dtype):
-            return cls._wrap_unsafe(dtype)
-        raise TypeError(f"Invalid dtype: {dtype}. Expected an instance of {cls.dtype_cls}.")
-
-    @classmethod
-    @abstractmethod
-    def _wrap_unsafe(cls: type[Self], dtype: TDType) -> Self:
-        raise NotImplementedError
-
-    def unwrap(self: Self) -> TDType:
-        endian_str = endianness_to_numpy_str(self.endianness)
-        return self.dtype_cls().newbyteorder(endian_str)
-
-    def with_endianness(self: Self, endianness: Endianness) -> Self:
-        return replace(self, endianness=endianness)
-
-    @abstractmethod
-    def to_json_value(self, data: np.generic, *, zarr_format: ZarrFormat) -> JSON:
-        """
-        Convert a single value to JSON-serializable format. Depends on the zarr format.
-        """
-        raise NotImplementedError
-
-    @abstractmethod
-    def from_json_value(self: Self, data: JSON, *, zarr_format: ZarrFormat) -> TScalar:
-        """
-        Read a JSON-serializable value as a numpy scalar
-        """
-        raise NotImplementedError
-
-
-@dataclass(frozen=True, kw_only=True)
-class Bool(DTypeWrapper[np.dtypes.BoolDType, np.bool_]):
-    name = "bool"
-    dtype_cls: ClassVar[type[np.dtypes.BoolDType]] = np.dtypes.BoolDType
-
-    def default_value(self) -> np.bool_:
-        return np.False_
-
-    @classmethod
-    def _wrap_unsafe(cls, dtype: np.dtypes.BoolDType) -> Self:
-        return cls()
-
-    def to_json_value(self, data: np.generic, zarr_format: ZarrFormat) -> bool:
-        return bool(data)
-
-    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.bool_:
-        if check_json_bool(data):
-            return self.unwrap().type(data)
-        raise TypeError(f"Invalid type: {data}. Expected a boolean.")
-
-
-class IntWrapperBase(DTypeWrapper[TDType, TScalar]):
-    def default_value(self) -> TScalar:
-        return self.unwrap().type(0)
-
-    @classmethod
-    def _wrap_unsafe(cls, dtype: TDType) -> Self:
-        return cls()
-
-    def to_json_value(self, data: np.generic, zarr_format: ZarrFormat) -> int:
-        return int(data)
-
-    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> TScalar:
-        if check_json_int(data):
-            return self.unwrap().type(data)
-        raise TypeError(f"Invalid type: {data}. Expected an integer.")
-
-
-@dataclass(frozen=True, kw_only=True)
-class Int8(IntWrapperBase[np.dtypes.Int8DType, np.int8]):
-    dtype_cls = np.dtypes.Int8DType
-    name = "int8"
-
-
-@dataclass(frozen=True, kw_only=True)
-class UInt8(IntWrapperBase[np.dtypes.UInt8DType, np.uint8]):
-    dtype_cls = np.dtypes.UInt8DType
-    name = "uint8"
-
-
-@dataclass(frozen=True, kw_only=True)
-class Int16(IntWrapperBase[np.dtypes.Int16DType, np.int16]):
-    dtype_cls = np.dtypes.Int16DType
-    name = "int16"
-
-
-@dataclass(frozen=True, kw_only=True)
-class UInt16(IntWrapperBase[np.dtypes.UInt16DType, np.uint16]):
-    dtype_cls = np.dtypes.UInt16DType
-    name = "uint16"
-
-
-@dataclass(frozen=True, kw_only=True)
-class Int32(IntWrapperBase[np.dtypes.Int32DType, np.int32]):
-    dtype_cls = np.dtypes.Int32DType
-    name = "int32"
-
-
-@dataclass(frozen=True, kw_only=True)
-class UInt32(IntWrapperBase[np.dtypes.UInt32DType, np.uint32]):
-    dtype_cls = np.dtypes.UInt32DType
-    name = "uint32"
-
-
-@dataclass(frozen=True, kw_only=True)
-class Int64(IntWrapperBase[np.dtypes.Int64DType, np.int64]):
-    dtype_cls = np.dtypes.Int64DType
-    name = "int64"
-
-
-@dataclass(frozen=True, kw_only=True)
-class UInt64(IntWrapperBase[np.dtypes.UInt64DType, np.uint64]):
-    dtype_cls = np.dtypes.UInt64DType
-    name = "uint64"
-
-
-class FloatWrapperBase(DTypeWrapper[TDType, TScalar]):
-    def default_value(self) -> TScalar:
-        return self.unwrap().type(0.0)
-
-    @classmethod
-    def _wrap_unsafe(cls, dtype: TDType) -> Self:
-        return cls()
-
-    def to_json_value(self, data: np.generic, zarr_format: ZarrFormat) -> JSONFloat:
-        return float_to_json(data, zarr_format)
-
-    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> TScalar:
-        if check_json_float_v2(data):
-            return self.unwrap().type(float_from_json(data, zarr_format))
-        raise TypeError(f"Invalid type: {data}. Expected a float.")
-
-
-@dataclass(frozen=True, kw_only=True)
-class Float16(FloatWrapperBase[np.dtypes.Float16DType, np.float16]):
-    dtype_cls = np.dtypes.Float16DType
-    name = "float16"
-
-
-@dataclass(frozen=True, kw_only=True)
-class Float32(FloatWrapperBase[np.dtypes.Float32DType, np.float32]):
-    dtype_cls = np.dtypes.Float32DType
-    name = "float32"
-
-
-@dataclass(frozen=True, kw_only=True)
-class Float64(FloatWrapperBase[np.dtypes.Float64DType, np.float64]):
-    dtype_cls = np.dtypes.Float64DType
-    name = "float64"
-
-
-@dataclass(frozen=True, kw_only=True)
-class Complex64(DTypeWrapper[np.dtypes.Complex64DType, np.complex64]):
-    dtype_cls = np.dtypes.Complex64DType
-    name = "complex64"
-
-    def default_value(self) -> np.complex64:
-        return np.complex64(0.0)
-
-    @classmethod
-    def _wrap_unsafe(cls, dtype: np.dtypes.Complex64DType) -> Self:
-        return cls()
-
-    def to_json_value(
-        self, data: np.generic, zarr_format: ZarrFormat
-    ) -> tuple[JSONFloat, JSONFloat]:
-        return complex_to_json(data, zarr_format)
-
-    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.complex64:
-        if check_json_complex_float_v3(data):
-            return complex_from_json(data, dtype=self.unwrap(), zarr_format=zarr_format)
-        raise TypeError(f"Invalid type: {data}. Expected a complex float.")
-
-
-@dataclass(frozen=True, kw_only=True)
-class Complex128(DTypeWrapper[np.dtypes.Complex128DType, np.complex128]):
-    dtype_cls = np.dtypes.Complex128DType
-    name = "complex128"
-
-    def default_value(self) -> np.complex128:
-        return np.complex128(0.0)
-
-    @classmethod
-    def _wrap_unsafe(cls, dtype: np.dtypes.Complex128DType) -> Self:
-        return cls()
-
-    def to_json_value(
-        self, data: np.generic, zarr_format: ZarrFormat
-    ) -> tuple[JSONFloat, JSONFloat]:
-        return complex_to_json(data, zarr_format)
-
-    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.complex128:
-        if check_json_complex_float_v3(data):
-            return complex_from_json(data, dtype=self.unwrap(), zarr_format=zarr_format)
-        raise TypeError(f"Invalid type: {data}. Expected a complex float.")
-
-
-@dataclass(frozen=True, kw_only=True)
-class FlexibleWrapperBase(DTypeWrapper[TDType, TScalar]):
-    item_size_bits: ClassVar[int]
-    length: int = 0
-
-    @classmethod
-    def _wrap_unsafe(cls, dtype: TDType) -> Self:
-        return cls(length=dtype.itemsize // (cls.item_size_bits // 8))
-
-    def unwrap(self) -> TDType:
-        endianness_code = endianness_to_numpy_str(self.endianness)
-        return self.dtype_cls(self.length).newbyteorder(endianness_code)
-
-
-@dataclass(frozen=True, kw_only=True)
-class FixedLengthAsciiString(FlexibleWrapperBase[np.dtypes.BytesDType, np.bytes_]):
-    dtype_cls = np.dtypes.BytesDType
-    name = "numpy.static_byte_string"
-    item_size_bits = 8
-
-    def default_value(self) -> np.bytes_:
-        return np.bytes_(b"")
-
-    def to_dict(self) -> dict[str, JSON]:
-        return {"name": self.name, "configuration": {"length": self.length}}
-
-    def to_json_value(self, data: np.generic, *, zarr_format: ZarrFormat) -> str:
-        return base64.standard_b64encode(data).decode("ascii")
-
-    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.bytes_:
-        if check_json_str(data):
-            return self.unwrap().type(base64.standard_b64decode(data.encode("ascii")))
-        raise TypeError(f"Invalid type: {data}. Expected a string.")
-
-
-@dataclass(frozen=True, kw_only=True)
-class StaticRawBytes(FlexibleWrapperBase[np.dtypes.VoidDType, np.void]):
-    dtype_cls = np.dtypes.VoidDType
-    name = "r*"
-    item_size_bits = 8
-
-    def default_value(self) -> np.void:
-        return self.cast_value(("\x00" * self.length).encode("ascii"))
-
-    def to_dict(self) -> dict[str, JSON]:
-        return {"name": f"r{self.length * self.item_size_bits}"}
-
-    @classmethod
-    def check_dtype(cls: type[Self], dtype: TDType) -> TypeGuard[TDType]:
-        """
-        Reject structured dtypes by ensuring that dtype.fields is None
-        """
-        return type(dtype) is cls.dtype_cls and dtype.fields is None
-
-    def unwrap(self) -> np.dtypes.VoidDType:
-        # this needs to be overridden because numpy does not allow creating a void type
-        # by invoking np.dtypes.VoidDType directly
-        endianness_code = endianness_to_numpy_str(self.endianness)
-        return np.dtype(f"{endianness_code}V{self.length}")
-
-    def to_json_value(self, data: np.generic, *, zarr_format: ZarrFormat) -> str:
-        return base64.standard_b64encode(data).decode("ascii")
-
-    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.void:
-        # todo: check that this is well-formed
-        return self.unwrap().type(base64.standard_b64decode(data))
-
-
-@dataclass(frozen=True, kw_only=True)
-class FixedLengthUnicodeString(FlexibleWrapperBase[np.dtypes.StrDType, np.str_]):
-    dtype_cls = np.dtypes.StrDType
-    name = "numpy.static_unicode_string"
-    item_size_bits = 32  # UCS4 is 32 bits per code point
-
-    def default_value(self) -> np.str_:
-        return np.str_("")
-
-    def to_dict(self) -> dict[str, JSON]:
-        return {"name": self.name, "configuration": {"length": self.length}}
-
-    def to_json_value(self, data: np.generic, *, zarr_format: ZarrFormat) -> str:
-        return str(data)
-
-    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.str_:
-        if not check_json_str(data):
-            raise TypeError(f"Invalid type: {data}. Expected a string.")
-        return self.unwrap().type(data)
-
-
-if _NUMPY_SUPPORTS_VLEN_STRING:
-
-    @dataclass(frozen=True, kw_only=True)
-    class VariableLengthString(DTypeWrapper[np.dtypes.StringDType, str]):
-        dtype_cls = np.dtypes.StringDType
-        name = "numpy.vlen_string"
-
-        def default_value(self) -> str:
-            return ""
-
-        @classmethod
-        def _wrap_unsafe(cls, dtype: np.dtypes.StringDType) -> Self:
-            return cls()
-
-        def to_dict(self) -> dict[str, JSON]:
-            return {"name": self.name}
-
-        def unwrap(self) -> np.dtypes.StringDType:
-            # StringDType does not have endianness, so we ignore it here
-            return self.dtype_cls()
-
-        def to_json_value(self, data: np.generic, *, zarr_format: ZarrFormat) -> str:
-            return str(data)
-
-        def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> str:
-            if not check_json_str(data):
-                raise TypeError(f"Invalid type: {data}. Expected a string.")
-            return self.unwrap().type(data)
-
-else:
-
-    @dataclass(frozen=True, kw_only=True)
-    class VariableLengthString(DTypeWrapper[np.dtypes.ObjectDType, str]):
-        dtype_cls = np.dtypes.ObjectDType
-        name = "numpy.vlen_string"
-        endianness: Endianness = field(default=None)
-
-        def default_value(self) -> str:
-            return ""
-
-        def __post_init__(self) -> None:
-            if self.endianness is not None:
-                raise ValueError("VariableLengthString does not support endianness.")
-
-        def to_dict(self) -> dict[str, JSON]:
-            return {"name": self.name}
-
-        @classmethod
-        def _wrap_unsafe(cls, dtype: np.dtypes.ObjectDType) -> Self:
-            return cls()
-
-        def to_json_value(self, data: np.generic, *, zarr_format: ZarrFormat) -> str:
-            return str(data)
-
-        def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> str:
-            """
-            String literals pass through
-            """
-            if not check_json_str(data):
-                raise TypeError(f"Invalid type: {data}. Expected a string.")
-            return data
-
-
-DateUnit = Literal["Y", "M", "W", "D"]
-TimeUnit = Literal["h", "m", "s", "ms", "us", "μs", "ns", "ps", "fs", "as"]
-
-
-@dataclass(frozen=True, kw_only=True)
-class DateTime64(DTypeWrapper[np.dtypes.DateTime64DType, np.datetime64]):
-    dtype_cls = np.dtypes.DateTime64DType
-    name = "numpy/datetime64"
-    unit: DateUnit | TimeUnit = "s"
-
-    def default_value(self) -> np.datetime64:
-        return np.datetime64("NaT")
-
-    @classmethod
-    def _wrap_unsafe(cls, dtype: np.dtypes.DateTime64DType) -> Self:
-        unit = dtype.name[dtype.name.rfind("[") + 1 : dtype.name.rfind("]")]
-        return cls(unit=unit)
-
-    def cast_value(self, value: object) -> np.datetime64:
-        return self.unwrap().type(value, self.unit)
-
-    def unwrap(self) -> np.dtypes.DateTime64DType:
-        return np.dtype(f"datetime64[{self.unit}]").newbyteorder(
-            endianness_to_numpy_str(self.endianness)
-        )
-
-    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.datetime64:
-        if check_json_int(data):
-            return datetime_from_json(data, self.unit)
-        raise TypeError(f"Invalid type: {data}. Expected an integer.")
-
-    def to_json_value(self, data: np.datetime64, *, zarr_format: ZarrFormat) -> int:
-        return datetime_to_json(data)
-
-
-@dataclass(frozen=True, kw_only=True)
-class Structured(DTypeWrapper[np.dtypes.VoidDType, np.void]):
-    dtype_cls = np.dtypes.VoidDType
-    name = "numpy/struct"
-    fields: tuple[tuple[str, DTypeWrapper[Any, Any], int], ...]
-
-    def default_value(self) -> np.void:
-        return np.array([0], dtype=self.unwrap())[0]
-
-    @classmethod
-    def check_dtype(cls, dtype: np.dtypes.DTypeLike) -> TypeGuard[np.dtypes.VoidDType]:
-        """
-        Check that this dtype is a numpy structured dtype
-        """
-        return super().check_dtype(dtype) and dtype.fields is not None
-
-    @classmethod
-    def _wrap_unsafe(cls, dtype: np.dtypes.VoidDType) -> Self:
-        fields: list[tuple[str, DTypeWrapper[Any, Any], int]] = []
-
-        if dtype.fields is None:
-            raise ValueError("numpy dtype has no fields")
-
-        for key, (dtype_instance, offset) in dtype.fields.items():
-            dtype_wrapped = data_type_registry.match_dtype(dtype_instance)
-            fields.append((key, dtype_wrapped, offset))
-
-        return cls(fields=tuple(fields))
-
-    def to_dict(self) -> dict[str, JSON]:
-        base_dict = super().to_dict()
-        if base_dict.get("configuration", {}) != {}:
-            raise ValueError(
-                "This data type wrapper cannot inherit from a data type wrapper that defines a configuration for its dict serialization"
-            )
-        field_configs = [
-            (f_name, f_dtype.to_dict(), f_offset) for f_name, f_dtype, f_offset in self.fields
-        ]
-        base_dict["configuration"] = {"fields": field_configs}
-        return base_dict
-
-    @classmethod
-    def from_dict(cls, data: dict[str, JSON]) -> Self:
-        fields = tuple(
-            (f_name, get_data_type_from_dict(f_dtype), f_offset)
-            for f_name, f_dtype, f_offset in data["fields"]
-        )
-        return cls(fields=fields)
-
-    def unwrap(self) -> np.dtypes.VoidDType:
-        return np.dtype([(key, dtype.unwrap()) for (key, dtype, _) in self.fields])
-
-    def to_json_value(self, data: np.generic, *, zarr_format: ZarrFormat) -> str:
-        return structured_scalar_to_json(data.tobytes(), zarr_format)
-
-    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.void:
-        if not check_json_str(data):
-            raise TypeError(f"Invalid type: {data}. Expected a string.")
-        as_bytes = structured_scalar_from_json(data, zarr_format=zarr_format)
-        dtype = self.unwrap()
-        return np.array([as_bytes], dtype=dtype.str).view(dtype)[0]
-
-
-def get_data_type_from_numpy(dtype: npt.DTypeLike) -> DTypeWrapper[Any, Any]:
-    if dtype in (str, "str"):
-        if _NUMPY_SUPPORTS_VLEN_STRING:
-            np_dtype = np.dtype("T")
-        else:
-            np_dtype = np.dtype("O")
-    else:
-        np_dtype = np.dtype(dtype)
-    data_type_registry.lazy_load()
-    return data_type_registry.match_dtype(np_dtype)
-
-
-def get_data_type_from_dict(dtype: dict[str, JSON]) -> DTypeWrapper[Any.Any]:
-    data_type_registry.lazy_load()
-    dtype_name = dtype["name"]
-    dtype_cls = data_type_registry.get(dtype_name)
-    if dtype_cls is None:
-        raise ValueError(f"No data type class matching name {dtype_name}")
-    return dtype_cls.from_dict(dtype.get("configuration", {}))
-
-
-def resolve_dtype(
-    dtype: npt.DTypeLike | DTypeWrapper[Any, Any] | dict[str, JSON],
-) -> DTypeWrapper[Any, Any]:
-    if isinstance(dtype, DTypeWrapper):
-        return dtype
-    elif isinstance(dtype, dict):
-        return get_data_type_from_dict(dtype)
-    else:
-        return get_data_type_from_numpy(dtype)
-
-
-def get_data_type_by_name(
-    dtype: str, configuration: dict[str, JSON] | None = None
-) -> DTypeWrapper[Any, Any]:
-    data_type_registry.lazy_load()
-    if configuration is None:
-        _configuration = {}
-    else:
-        _configuration = configuration
-    maybe_dtype_cls = data_type_registry.get(dtype)
-    if maybe_dtype_cls is None:
-        raise ValueError(f"No data type class matching name {dtype}")
-    return maybe_dtype_cls.from_dict(_configuration)
-
-
-@dataclass(frozen=True, kw_only=True)
-class DataTypeRegistry:
-    contents: dict[str, type[DTypeWrapper[Any, Any]]] = field(default_factory=dict, init=False)
-    lazy_load_list: list[EntryPoint] = field(default_factory=list, init=False)
-
-    def lazy_load(self) -> None:
-        for e in self.lazy_load_list:
-            self.register(e.load())
-
-        self.lazy_load_list.clear()
-
-    def register(self: Self, cls: type[DTypeWrapper[Any, Any]]) -> None:
-        # don't register the same dtype twice
-        if cls.name not in self.contents or self.contents[cls.name] != cls:
-            self.contents[cls.name] = cls
-
-    def get(self, key: str) -> type[DTypeWrapper[Any, Any]]:
-        return self.contents[key]
-
-    def match_dtype(self, dtype: TDType) -> DTypeWrapper[Any, Any]:
-        self.lazy_load()
-        for val in self.contents.values():
-            try:
-                return val.wrap(dtype)
-            except TypeError:
-                pass
-        raise ValueError(f"No data type wrapper found that matches dtype '{dtype}'")
-
-
-def register_data_type(cls: type[DTypeWrapper[Any, Any]]) -> None:
-    data_type_registry.register(cls)
-
-
-data_type_registry = DataTypeRegistry()
-
-INTEGER_DTYPE = Int8 | Int16 | Int32 | Int64 | UInt8 | UInt16 | UInt32 | UInt64
-FLOAT_DTYPE = Float16 | Float32 | Float64
-COMPLEX_DTYPE = Complex64 | Complex128
-STRING_DTYPE = FixedLengthUnicodeString | VariableLengthString | FixedLengthAsciiString
-DTYPE = (
-    Bool
-    | INTEGER_DTYPE
-    | FLOAT_DTYPE
-    | COMPLEX_DTYPE
-    | STRING_DTYPE
-    | StaticRawBytes
-    | Structured
-    | DateTime64
-)
-for dtype in get_args(DTYPE):
-    register_data_type(dtype)
diff --git a/src/zarr/core/metadata/v2.py b/src/zarr/core/metadata/v2.py
index 3883a998c1..94c69602af 100644
--- a/src/zarr/core/metadata/v2.py
+++ b/src/zarr/core/metadata/v2.py
@@ -8,11 +8,8 @@
 import numcodecs.abc
 
 from zarr.abc.metadata import Metadata
-from zarr.core.metadata.dtype import (
-    DTypeWrapper,
-    Structured,
-    get_data_type_from_numpy,
-)
+from zarr.core.dtype import get_data_type_from_numpy
+from zarr.core.dtype.wrapper import DTypeWrapper
 
 if TYPE_CHECKING:
     from typing import Any, Literal, Self
@@ -82,7 +79,7 @@ def __init__(
         order_parsed = parse_indexing_order(order)
         dimension_separator_parsed = parse_separator(dimension_separator)
         filters_parsed = parse_filters(filters)
-        fill_value_parsed = parse_fill_value(fill_value, dtype=dtype.unwrap())
+        fill_value_parsed = parse_fill_value(fill_value, dtype=dtype.to_dtype())
         attributes_parsed = parse_attributes(attributes)
 
         object.__setattr__(self, "shape", shape_parsed)
@@ -125,9 +122,9 @@ def from_dict(cls, data: dict[str, Any]) -> ArrayV2Metadata:
         _data = data.copy()
         # check that the zarr_format attribute is correct
         _ = parse_zarr_format(_data.pop("zarr_format"))
-        dtype = get_data_type_from_numpy(parse_dtype(_data["dtype"]))
+        dtype = get_data_type_from_numpy(_data["dtype"])
         _data["dtype"] = dtype
-        if dtype.unwrap().kind in "SV":
+        if dtype.to_dtype().kind in "SV":
             fill_value_encoded = _data.get("fill_value")
             if fill_value_encoded is not None:
                 fill_value = base64.standard_b64decode(fill_value_encoded)
@@ -181,13 +178,7 @@ def to_dict(self) -> dict[str, JSON]:
             fill_value = self.dtype.to_json_value(self.fill_value, zarr_format=2)
             zarray_dict["fill_value"] = fill_value
 
-        _ = zarray_dict.pop("dtype")
-        dtype_json: JSON
-        if isinstance(self.dtype, Structured):
-            dtype_json = tuple(self.dtype.unwrap().descr)
-        else:
-            dtype_json = self.dtype.unwrap().str
-        zarray_dict["dtype"] = dtype_json
+        zarray_dict["dtype"] = self.dtype.get_name(zarr_format=2)
 
         return zarray_dict
 
diff --git a/src/zarr/core/metadata/v3.py b/src/zarr/core/metadata/v3.py
index e285490bfd..2c6e65037e 100644
--- a/src/zarr/core/metadata/v3.py
+++ b/src/zarr/core/metadata/v3.py
@@ -4,10 +4,9 @@
 
 from zarr.abc.metadata import Metadata
 from zarr.core.buffer.core import default_buffer_prototype
-from zarr.core.metadata.dtype import (
+from zarr.core.dtype import (
     DTypeWrapper,
     VariableLengthString,
-    get_data_type_by_name,
     get_data_type_from_dict,
 )
 
@@ -96,7 +95,7 @@ def validate_array_bytes_codec(codecs: tuple[Codec, ...]) -> ArrayBytesCodec:
     return abcs[0]
 
 
-def validate_codecs(codecs: tuple[Codec, ...], dtype: DTypeWrapper) -> None:
+def validate_codecs(codecs: tuple[Codec, ...], dtype: DTypeWrapper[Any, Any]) -> None:
     """Check that the codecs are valid for the given dtype"""
     from zarr.codecs.sharding import ShardingCodec
 
@@ -235,7 +234,7 @@ class ArrayV3MetadataDict(TypedDict):
 @dataclass(frozen=True, kw_only=True)
 class ArrayV3Metadata(Metadata):
     shape: ChunkCoords
-    data_type: DTypeWrapper
+    data_type: DTypeWrapper[Any, Any]
     chunk_grid: ChunkGrid
     chunk_key_encoding: ChunkKeyEncoding
     fill_value: Any
@@ -250,7 +249,7 @@ def __init__(
         self,
         *,
         shape: Iterable[int],
-        data_type: DTypeWrapper,
+        data_type: DTypeWrapper[Any, Any],
         chunk_grid: dict[str, JSON] | ChunkGrid,
         chunk_key_encoding: ChunkKeyEncodingLike,
         fill_value: object,
@@ -270,14 +269,14 @@ def __init__(
         chunk_grid_parsed = ChunkGrid.from_dict(chunk_grid)
         chunk_key_encoding_parsed = ChunkKeyEncoding.from_dict(chunk_key_encoding)
         dimension_names_parsed = parse_dimension_names(dimension_names)
-        fill_value_parsed = data_type.unwrap().type(fill_value)
+        fill_value_parsed = data_type.to_dtype().type(fill_value)
         attributes_parsed = parse_attributes(attributes)
         codecs_parsed_partial = parse_codecs(codecs)
         storage_transformers_parsed = parse_storage_transformers(storage_transformers)
 
         array_spec = ArraySpec(
             shape=shape_parsed,
-            dtype=data_type.unwrap(),
+            dtype=data_type.to_dtype(),
             fill_value=fill_value_parsed,
             config=ArrayConfig.from_dict({}),  # TODO: config is not needed here.
             prototype=default_buffer_prototype(),  # TODO: prototype is not needed here.
@@ -312,7 +311,7 @@ def _validate_metadata(self) -> None:
             raise ValueError("`fill_value` is required.")
         for codec in self.codecs:
             codec.validate(
-                shape=self.shape, dtype=self.data_type.unwrap(), chunk_grid=self.chunk_grid
+                shape=self.shape, dtype=self.data_type.to_dtype(), chunk_grid=self.chunk_grid
             )
 
     @property
@@ -382,9 +381,7 @@ def from_dict(cls, data: dict[str, JSON]) -> Self:
 
         data_type_json = _data.pop("data_type")
         if isinstance(data_type_json, str):
-            # check that the data_type attribute is valid
-            data_type = get_data_type_by_name(data_type_json)
-
+            data_type = get_data_type_from_dict({"name": data_type_json})
         else:
             data_type = get_data_type_from_dict(data_type_json)
 
diff --git a/src/zarr/registry.py b/src/zarr/registry.py
index 8830cdb1a9..d1fe1d181c 100644
--- a/src/zarr/registry.py
+++ b/src/zarr/registry.py
@@ -6,7 +6,7 @@
 from typing import TYPE_CHECKING, Any, Generic, TypeVar
 
 from zarr.core.config import BadConfigError, config
-from zarr.core.metadata.dtype import data_type_registry
+from zarr.core.dtype import data_type_registry
 
 if TYPE_CHECKING:
     from importlib.metadata import EntryPoint
diff --git a/src/zarr/testing/strategies.py b/src/zarr/testing/strategies.py
index aa42329be7..2eef703448 100644
--- a/src/zarr/testing/strategies.py
+++ b/src/zarr/testing/strategies.py
@@ -15,6 +15,7 @@
 from zarr.core.chunk_grids import RegularChunkGrid
 from zarr.core.chunk_key_encodings import DefaultChunkKeyEncoding
 from zarr.core.common import ZarrFormat
+from zarr.core.dtype import parse_data_type
 from zarr.core.metadata import ArrayV2Metadata, ArrayV3Metadata
 from zarr.core.sync import sync
 from zarr.storage import MemoryStore, StoreLike
@@ -133,8 +134,9 @@ def array_metadata(
     shape = draw(array_shapes())
     ndim = len(shape)
     chunk_shape = draw(array_shapes(min_dims=ndim, max_dims=ndim))
-    dtype = draw(v3_dtypes())
-    fill_value = draw(npst.from_dtype(dtype))
+    np_dtype = draw(v3_dtypes())
+    dtype = parse_data_type(np_dtype)
+    fill_value = draw(npst.from_dtype(np_dtype))
     if zarr_format == 2:
         return ArrayV2Metadata(
             shape=shape,
diff --git a/tests/conftest.py b/tests/conftest.py
index 6ff1c4596f..5e17c82a37 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -20,7 +20,7 @@
 from zarr.core.chunk_grids import RegularChunkGrid, _auto_partition
 from zarr.core.common import JSON, parse_shapelike
 from zarr.core.config import config as zarr_config
-from zarr.core.metadata.dtype import get_data_type_from_numpy
+from zarr.core.dtype import get_data_type_from_numpy
 from zarr.core.metadata.v2 import ArrayV2Metadata
 from zarr.core.metadata.v3 import ArrayV3Metadata
 from zarr.core.sync import sync
@@ -263,7 +263,7 @@ def create_array_metadata(
         array_shape=shape_parsed,
         shard_shape=shards,
         chunk_shape=chunks,
-        item_size=dtype_parsed.unwrap().itemsize,
+        item_size=dtype_parsed.to_dtype().itemsize,
     )
 
     if order is None:
diff --git a/tests/test_array.py b/tests/test_array.py
index 5c58b3d3be..f8880c86c0 100644
--- a/tests/test_array.py
+++ b/tests/test_array.py
@@ -39,10 +39,9 @@
 from zarr.core.buffer.cpu import NDBuffer
 from zarr.core.chunk_grids import _auto_partition
 from zarr.core.common import JSON, MemoryOrder, ZarrFormat
+from zarr.core.dtype import get_data_type_from_numpy
 from zarr.core.group import AsyncGroup
 from zarr.core.indexing import BasicIndexer, ceildiv
-from zarr.core.metadata.dtype import get_data_type_from_numpy
-from zarr.core.metadata.v3 import ArrayV3Metadata
 from zarr.core.sync import sync
 from zarr.errors import ContainsArrayError, ContainsGroupError
 from zarr.storage import LocalStore, MemoryStore, StorePath
@@ -50,6 +49,7 @@
 if TYPE_CHECKING:
     from zarr.core.array_spec import ArrayConfigLike
     from zarr.core.metadata.v2 import ArrayV2Metadata
+    from zarr.core.metadata.v3 import ArrayV3Metadata
 
 
 @pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"])
@@ -1004,7 +1004,7 @@ async def test_v3_chunk_encoding(
             filters=filters,
             compressors=compressors,
             serializer="auto",
-            dtype=arr.metadata.data_type,
+            dtype=arr.metadata.data_type,  # type: ignore[union-attr]
         )
         assert arr.filters == filters_expected
         assert arr.compressors == compressors_expected
@@ -1119,7 +1119,7 @@ async def test_with_data(impl: Literal["sync", "async"], store: Store) -> None:
         elif impl == "async":
             arr = await create_array(store, name=name, data=data, zarr_format=3)
             stored = await arr._get_selection(
-                BasicIndexer(..., shape=arr.shape, chunk_grid=arr.metadata.chunk_grid),
+                BasicIndexer(..., shape=arr.shape, chunk_grid=arr.chunk_grid),
                 prototype=default_buffer_prototype(),
             )
         else:
diff --git a/tests/test_codecs/test_vlen.py b/tests/test_codecs/test_vlen.py
index a6c01153ff..ee3415a501 100644
--- a/tests/test_codecs/test_vlen.py
+++ b/tests/test_codecs/test_vlen.py
@@ -8,7 +8,7 @@
 from zarr.abc.codec import Codec
 from zarr.abc.store import Store
 from zarr.codecs import ZstdCodec
-from zarr.core.metadata.dtype import get_data_type_from_numpy
+from zarr.core.dtype import get_data_type_from_numpy
 from zarr.core.metadata.v3 import ArrayV3Metadata
 from zarr.core.strings import _NUMPY_SUPPORTS_VLEN_STRING
 from zarr.storage import StorePath
diff --git a/tests/test_metadata/test_consolidated.py b/tests/test_metadata/test_consolidated.py
index a81625b7eb..508519e696 100644
--- a/tests/test_metadata/test_consolidated.py
+++ b/tests/test_metadata/test_consolidated.py
@@ -18,9 +18,9 @@
     open_consolidated,
 )
 from zarr.core.buffer import cpu, default_buffer_prototype
+from zarr.core.dtype import parse_data_type
 from zarr.core.group import ConsolidatedMetadata, GroupMetadata
 from zarr.core.metadata import ArrayV3Metadata
-from zarr.core.metadata.dtype import get_data_type_from_numpy
 from zarr.core.metadata.v2 import ArrayV2Metadata
 from zarr.storage import StorePath
 
@@ -504,7 +504,7 @@ async def test_consolidated_metadata_backwards_compatibility(
     async def test_consolidated_metadata_v2(self):
         store = zarr.storage.MemoryStore()
         g = await AsyncGroup.from_store(store, attributes={"key": "root"}, zarr_format=2)
-        dtype = get_data_type_from_numpy("uint8")
+        dtype = parse_data_type("uint8")
         await g.create_array(name="a", shape=(1,), attributes={"key": "a"}, dtype=dtype)
         g1 = await g.create_group(name="g1", attributes={"key": "g1"})
         await g1.create_group(name="g2", attributes={"key": "g2"})
diff --git a/tests/test_metadata/test_dtype.py b/tests/test_metadata/test_dtype.py
index 8a1bcdedd1..ee19cdf845 100644
--- a/tests/test_metadata/test_dtype.py
+++ b/tests/test_metadata/test_dtype.py
@@ -5,15 +5,19 @@
 import numpy as np
 import pytest
 
-from zarr.core.metadata.dtype import (
+from zarr.core.dtype import (
     DTYPE,
+    DTypeWrapper,
+    VariableLengthString,
+    data_type_registry,
+)
+from zarr.core.dtype._numpy import (
     Bool,
     Complex64,
     Complex128,
-    DataTypeRegistry,
     DateTime64,
-    DTypeWrapper,
     FixedLengthAsciiString,
+    FixedLengthBytes,
     FixedLengthUnicodeString,
     Float16,
     Float32,
@@ -22,15 +26,14 @@
     Int16,
     Int32,
     Int64,
-    StaticRawBytes,
     Structured,
     UInt8,
     UInt16,
     UInt32,
     UInt64,
-    VariableLengthString,
-    data_type_registry,
 )
+from zarr.core.dtype.common import DataTypeValidationError
+from zarr.core.dtype.registry import DataTypeRegistry
 
 
 @pytest.fixture
@@ -65,7 +68,7 @@ def dtype_registry() -> DataTypeRegistry:
         (Complex128, "complex128"),
         (FixedLengthUnicodeString, "U"),
         (FixedLengthAsciiString, "S"),
-        (StaticRawBytes, "V"),
+        (FixedLengthBytes, "V"),
         (VariableLengthString, VLEN_STRING_CODE),
         (Structured, np.dtype([("a", np.float64), ("b", np.int8)])),
         (DateTime64, "datetime64[s]"),
@@ -79,23 +82,23 @@ def test_wrap(wrapper_cls: type[DTypeWrapper[Any, Any]], np_dtype: np.dtype | st
     """
     dt = np.dtype(np_dtype)
     assert wrapper_cls.dtype_cls is type(dt)
-    wrapped = wrapper_cls.wrap(dt)
+    wrapped = wrapper_cls.from_dtype(dt)
 
-    with pytest.raises(TypeError, match="Invalid dtype"):
-        wrapper_cls.wrap("not a dtype")
+    with pytest.raises(DataTypeValidationError, match="Invalid dtype"):
+        wrapper_cls.from_dtype("not a dtype")
 
     assert isinstance(wrapped, wrapper_cls)
-    assert wrapped.unwrap() == dt
+    assert wrapped.to_dtype() == dt
 
 
 @pytest.mark.parametrize("wrapper_cls", get_args(DTYPE))
 def test_dict_serialization(wrapper_cls: DTYPE) -> None:
     if issubclass(wrapper_cls, Structured):
-        instance = wrapper_cls(fields=((("a", Bool(), 0),)))
+        instance = wrapper_cls(fields=((("a", Bool()),)))
     else:
         instance = wrapper_cls()
     as_dict = instance.to_dict()
-    assert wrapper_cls.from_dict(data=as_dict.get("configuration", {})) == instance
+    assert wrapper_cls.from_dict(as_dict) == instance
 
 
 @pytest.mark.parametrize(
@@ -116,10 +119,10 @@ def test_dict_serialization(wrapper_cls: DTYPE) -> None:
         (Complex64(), np.complex64(0)),
         (Complex128(), np.complex128(0)),
         (FixedLengthAsciiString(length=3), np.bytes_(b"")),
-        (StaticRawBytes(length=3), np.void(b"\x00\x00\x00")),
+        (FixedLengthBytes(length=3), np.void(b"\x00\x00\x00")),
         (FixedLengthUnicodeString(length=3), np.str_("")),
         (
-            Structured(fields=(("a", Float64(), 0), ("b", Int8(), 8))),
+            Structured(fields=(("a", Float64()), ("b", Int8()))),
             np.array([0], dtype=[("a", np.float64), ("b", np.int8)])[0],
         ),
         (VariableLengthString(), ""),
@@ -154,7 +157,7 @@ def test_default_value(wrapper: type[DTypeWrapper[Any, Any]], expected_default:
         (Complex64(), np.complex64(42.0 + 1.0j), (42.0, 1.0)),
         (Complex128(), np.complex128(42.0 + 1.0j), (42.0, 1.0)),
         (FixedLengthAsciiString(length=4), np.bytes_(b"test"), "dGVzdA=="),
-        (StaticRawBytes(length=4), np.void(b"test"), "dGVzdA=="),
+        (FixedLengthBytes(length=4), np.void(b"test"), "dGVzdA=="),
         (FixedLengthUnicodeString(length=4), np.str_("test"), "test"),
         (VariableLengthString(), "test", "test"),
         (DateTime64(unit="s"), np.datetime64("2021-01-01T00:00:00", "s"), 1609459200),
@@ -187,7 +190,7 @@ def test_to_json_value_v2(
         (Complex64(), (42.0, 1.0), np.complex64(42.0 + 1.0j)),
         (Complex128(), (42.0, 1.0), np.complex128(42.0 + 1.0j)),
         (FixedLengthAsciiString(length=4), "dGVzdA==", np.bytes_(b"test")),
-        (StaticRawBytes(length=4), "dGVzdA==", np.void(b"test")),
+        (FixedLengthBytes(length=4), "dGVzdA==", np.void(b"test")),
         (FixedLengthUnicodeString(length=4), "test", np.str_("test")),
         (VariableLengthString(), "test", "test"),
         (DateTime64(unit="s"), 1609459200, np.datetime64("2021-01-01T00:00:00", "s")),
@@ -208,8 +211,8 @@ def test_register(dtype_registry: DataTypeRegistry) -> None:
         """
         Test that registering a dtype in a data type registry works.
         """
-        dtype_registry.register(Bool)
-        assert dtype_registry.get("bool") == Bool
+        dtype_registry.register(Bool._zarr_v3_name, Bool)
+        assert dtype_registry.get(Bool._zarr_v3_name) == Bool
         assert isinstance(dtype_registry.match_dtype(np.dtype("bool")), Bool)
 
     @staticmethod
@@ -217,13 +220,13 @@ def test_override(dtype_registry: DataTypeRegistry) -> None:
         """
         Test that registering a new dtype with the same name works (overriding the previous one).
         """
-        dtype_registry.register(Bool)
+        dtype_registry.register(Bool._zarr_v3_name, Bool)
 
         class NewBool(Bool):
             def default_value(self) -> np.bool_:
                 return np.True_
 
-        dtype_registry.register(NewBool)
+        dtype_registry.register(NewBool._zarr_v3_name, NewBool)
         assert isinstance(dtype_registry.match_dtype(np.dtype("bool")), NewBool)
 
     @staticmethod
@@ -236,7 +239,7 @@ def test_match_dtype(
         """
         Test that match_dtype resolves a numpy dtype into an instance of the correspond wrapper for that dtype.
         """
-        dtype_registry.register(wrapper_cls)
+        dtype_registry.register(wrapper_cls._zarr_v3_name, wrapper_cls)
         assert isinstance(dtype_registry.match_dtype(np.dtype(dtype_str)), wrapper_cls)
 
     @staticmethod
@@ -260,8 +263,8 @@ def test_registered_dtypes(wrapper_cls: DTypeWrapper[Any, Any]) -> None:
         Test that the registered dtypes can be retrieved from the registry.
         """
         if issubclass(wrapper_cls, Structured):
-            instance = wrapper_cls(fields=((("a", Bool(), 0),)))
+            instance = wrapper_cls(fields=((("a", Bool()),)))
         else:
             instance = wrapper_cls()
 
-        assert data_type_registry.match_dtype(instance.unwrap()) == instance
+        assert data_type_registry.match_dtype(instance.to_dtype()) == instance
diff --git a/tests/test_metadata/test_v2.py b/tests/test_metadata/test_v2.py
index 1c5ddd6f9a..2eec9a6c74 100644
--- a/tests/test_metadata/test_v2.py
+++ b/tests/test_metadata/test_v2.py
@@ -9,9 +9,9 @@
 import zarr.storage
 from zarr.core.buffer import cpu
 from zarr.core.buffer.core import default_buffer_prototype
+from zarr.core.dtype._numpy import Float32, Float64, Int16
 from zarr.core.group import ConsolidatedMetadata, GroupMetadata
 from zarr.core.metadata import ArrayV2Metadata
-from zarr.core.metadata.dtype import Float32, Float64, Int16
 from zarr.core.metadata.v2 import parse_zarr_format
 
 if TYPE_CHECKING:
diff --git a/tests/test_metadata/test_v3.py b/tests/test_metadata/test_v3.py
index ea59496280..23f28ab097 100644
--- a/tests/test_metadata/test_v3.py
+++ b/tests/test_metadata/test_v3.py
@@ -11,8 +11,10 @@
 from zarr.core.buffer import default_buffer_prototype
 from zarr.core.chunk_key_encodings import DefaultChunkKeyEncoding, V2ChunkKeyEncoding
 from zarr.core.config import config
+from zarr.core.dtype import get_data_type_from_numpy
+from zarr.core.dtype._numpy import DateTime64
+from zarr.core.dtype.common import complex_from_json
 from zarr.core.group import GroupMetadata, parse_node_type
-from zarr.core.metadata.dtype import DateTime64, complex_from_json, get_data_type_from_numpy
 from zarr.core.metadata.v3 import (
     ArrayV3Metadata,
     parse_dimension_names,
@@ -127,7 +129,7 @@ def test_jsonify_fill_value_complex(fill_value: Any, dtype_str: str) -> None:
     """
     zarr_format = 3
     dtype = get_data_type_from_numpy(dtype_str)
-    expected = dtype.unwrap().type(complex(*fill_value))
+    expected = dtype.to_dtype().type(complex(*fill_value))
     observed = dtype.from_json_value(fill_value, zarr_format=zarr_format)
     assert observed == expected
     assert dtype.to_json_value(observed, zarr_format=zarr_format) == tuple(fill_value)

From e8fd72cbf40ff51c937b81d95321e8de8e57230d Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Thu, 13 Mar 2025 14:10:22 +0100
Subject: [PATCH 022/130] start design doc

---
 docs/user-guide/data_types.rst | 156 +++++++++++++++++++++++++++++++++
 docs/user-guide/index.rst      |   1 +
 src/zarr/core/dtype/_numpy.py  |   6 +-
 src/zarr/core/dtype/wrapper.py |  41 ++++-----
 4 files changed, 181 insertions(+), 23 deletions(-)
 create mode 100644 docs/user-guide/data_types.rst

diff --git a/docs/user-guide/data_types.rst b/docs/user-guide/data_types.rst
new file mode 100644
index 0000000000..19095e1851
--- /dev/null
+++ b/docs/user-guide/data_types.rst
@@ -0,0 +1,156 @@
+Data types
+==========
+
+Zarr's data type model
+----------------------
+
+Every Zarr array has a "data type", which defines the meaning and physical layout of the 
+array's elements. Zarr is heavily influenced by `NumPy <https://numpy.org/doc/stable/>`_, and
+Zarr arrays can use many of the same data types as numpy arrays::
+    >>> import zarr
+    >>> import numpy as np
+    >>> zarr.create_array(store={}, shape=(10,), dtype=np.dtype('uint8'))
+    >>> z
+    <Array memory://126225407345920 shape=(10,) dtype=uint8>    
+
+But Zarr data types and Numpy data types are also very different in one key respect: 
+Zarr arrays are designed to be persisted to storage and later read, possibly by Zarr implementations in different programming languages. 
+So in addition to defining a memory layout for array elements, each Zarr data type defines a procedure for 
+reading and writing that data type to Zarr array metadata, and also reading and writing **instances** of that data type to 
+array metadata.
+
+Data types in Zarr version 2
+-----------------------------
+
+Version 2 of the Zarr format defined its data types relative to `Numpy's data types <https://numpy.org/doc/2.1/reference/arrays.dtypes.html#data-type-objects-dtype>`_, and added a few non-Numpy data types as well.
+Thus the JSON identifer for a Numpy-compatible data type is just the Numpy ``str`` attribute of that dtype:
+
+    >>> import zarr
+    >>> import numpy as np
+    >>> import json
+    >>> np_dtype = np.dtype('int64')
+    >>> z = zarr.create_array(shape=(1,), dtype=np_dtype, zarr_format=2)
+    >>> dtype_meta = json.loads(store['.zarray'].to_bytes())["dtype"]
+    >>> assert dtype_meta == np_dtype.str # True
+    >>> dtype_meta
+    <i8
+
+.. note:: 
+    The ``<`` character in the data type metadata encodes the `endianness https://numpy.org/doc/2.2/reference/generated/numpy.dtype.byteorder.html`_, or "byte order", of the data type. Following Numpy's example,
+Zarr version 2 data types associate each data type with an endianness where applicable. Zarr version 3 data types do not store endianness information.
+
+In addition to defining a representation of the data type itself (which in the example above was just a simple string ``"<i8"``, Zarr also 
+defines a metadata representation of scalars associated with that data type. Integers are stored as ``JSON`` numbers,
+as are floats, with the caveat that `NaN`, positive infinity, and negative infinity are stored as special strings.
+
+Data types in Zarr version 3
+----------------------------
+
+* No endianness
+* Data type can be encoded as a string or a ``JSON`` object with the structure ``{"name": <string identifier>, "configuration": {...}}``
+
+Data types in Zarr-Python
+-------------------------
+
+Zarr-Python supports two different Zarr formats, and those two formats specify data types in rather different ways: 
+data types in Zarr version 2 are encoded as Numpy-compatible strings, while data types in Zarr version 3 are encoded as either strings or ``JSON`` objects, 
+and the Zarr V3 data types don't have any associated endianness information, unlike Zarr V2 data types. 
+
+If that wasn't enough, we want Zarr-Python to support data types beyond what's available in Numpy. So it's crucial that we have a 
+model of array data types that can adapt to the differences between Zarr V2 and V3 and doesn't over-fit to Numpy.
+
+Here are the operations we need to perform on data types in Zarr-Python:
+
+* Round-trip native data types to fields in array metadata documents.
+    For example, the Numpy data type ``np.dtype('>i2')`` should be saved as ``{..., "dtype" : ">i2"}`` in Zarr V2 metadata. 
+    
+    In Zarr V3 metadata, the same Numpy data type would be saved as  ``{..., "data_type": "int16", "codecs": [..., {"name": "bytes", "configuration": {"endian": "big"}, ...]}``
+
+* Define a default fill value. This is not mandated by the Zarr specifications, but it's convenient for users 
+  to have a useful default. For numeric types like integers and floats the default can be statically set to 0, but for 
+  parametric data types like fixed-length strings the default can only be generated after the data type has been parametrized at runtime.
+
+* Round-trip scalars to the ``fill_value`` field in Zarr V2 and V3 array metadata documents. The Zarr V2 and V3 specifications
+  define how scalars of each data type should be stored as JSON in array metadata documents, and in principle each data type
+  can define this encoding separately.
+
+* Do all of the above for *user-defined data types*. Zarr-Python should support data types added as extensions,so we cannot 
+  hard-code the list of data types. We need to ensure that users can easily (or easily enough) define a python object 
+  that models their custom data type and register this object with Zarr-Python, so that the above operations all succeed for their 
+  custom data type.
+
+To achieve these goals, Zarr Python uses a class called :class:`zarr.core.dtype.DTypeWrapper` to wrap native data types. Each data type 
+supported by Zarr Python is modeled by a subclass of `DTypeWrapper`, which has the following structure: 
+
+(attribute) ``dtype_cls``
+^^^^^^^^^^^^^
+The ``dtype_cls`` attribute is a **class variable** that is bound to a class that can produce
+an instance of a native data type. For example, on the ``DTypeWrapper`` used to model the boolean 
+data type, the ``dtype_cls`` attribute is bound to the numpy bool data type class: ``np.dtypes.BoolDType``. 
+This attribute is used when we need to create an instance of the native data type, for example when 
+defining a Numpy array that will contain Zarr data. 
+
+It might seem odd that ``DTypeWrapper.dtype_cls`` binds to a *class* that produces a native data type instead of an instance of that native data type -- 
+why not have a ``DTypeWrapper.dtype`` attribute that binds to ``np.dtypes.BoolDType()``? The reason why ``DTypeWrapper``
+doesn't wrap a concrete data type instance is because data type instances may have endianness information, but Zarr V3 
+data types do not. To model Zarr V3 data types, we need endianness to be an **instance variable** which is 
+defined when creating an instance of the ```DTypeWrapper``. Subclasses of ``DTypeWrapper`` that model data types with 
+byte order semantics thus have ``endianness`` as an instance variable, and this value can be set when creating an instance of the wrapper.
+
+
+(attribute) ``_zarr_v3_name``
+^^^^^^^^^^^^^
+The ``_zarr_v3_name`` attribute encodes the canonical name for a data type for Zarr V3. For many data types these names 
+are defined in the `Zarr V3 specification https://zarr-specs.readthedocs.io/en/latest/v3/core/v3.0.html#data-types`_ For nearly all of the
+data types defined in Zarr V3, this name can be used to uniquely specify a data type. The one exception is the ``r*`` data type,
+which is parametrized by a number of bits, and so may take the form ``r8``, ``r16``, ... etc. 
+
+(class method) ``from_dtype(cls, dtype) -> Self``
+^^^^^^^^^
+This method defines a procedure for safely converting a native dtype instance into an instance of ``DTypeWrapper``. It should perform
+validation of its input to ensure that the native dtype is an instance of the ``dtype_cls`` class attribute, for example. For some 
+data types, additional checks are needed -- in Numpy "structured" data types and "void" data types use the same class, with different properties. 
+A ``DTypeWrapper`` that wraps Numpy structured data types must do additional checks to ensure that the input ``dtype`` is actually a structured data type.
+If input validation succeeds, this method will call ``_from_dtype_unsafe``.  
+
+(class method) ``_from_dtype_unsafe(cls, dtype) -> Self``
+^^^^^^^^^^
+This method defines the procedure for converting a native data type instance, like ``np.dtype('uint8')``,
+into a wrapper class instance. The ``unsafe`` prefix on the method name denotes that this method should not 
+perform any input validation. Input validation should be done by the routine that calls this method. 
+
+For many data types, creating the wrapper class takes no arguments and so this method can just return ``cls()``.
+But for data types with runtime attributes like endianness or length (for fixed-size strings), this ``_from_dtype_unsafe`` 
+ensures that those attributes of ``dtype`` are mapped on to the correct parameters in the ``DTypeWrapper`` class constructor.
+
+(method) ``to_dtype(self) -> dtype``
+^^^^^^^
+This method produces a native data type consistent with the properties of the ``DTypeWrapper``. Together 
+with ``from_dtype``, this method allows round-trip conversion of a native data type in to a wrapper class and then out again.
+
+That is, for some ``DTypeWrapper`` class ``FooWrapper`` that wraps a native data type called ``foo``, ``FooWrapper.from_dtype(instance_of_foo).to_dtype() == instance_of_foo`` should be true.
+
+(method) ``to_dict(self) -> dict`` 
+^^^^^
+This method generates a JSON-serialiazable representation of the wrapped data type which can be stored in 
+Zarr metadata.
+
+(method) ``cast_value(self, value: object) -> scalar``
+^^^^^
+Cast a python object to an instance of the wrapped data type. This is used for generating the default 
+value associated with this data type.
+
+
+(method) ``default_value(self) -> scalar``
+^^^^
+Return the default value for the wrapped data type. Zarr-Python uses this method to generate a default fill value 
+for an array when a user has not requested one. 
+
+Why is this a method and not a static attribute? Although some data types 
+can have a static default value, parametrized data types like fixed-length strings or structured data types cannot. For these data types,
+a default value must be calculated based on the attributes of the wrapped data type.
+
+(method) ``
+
+
+
diff --git a/docs/user-guide/index.rst b/docs/user-guide/index.rst
index c50713332b..ea34ac2561 100644
--- a/docs/user-guide/index.rst
+++ b/docs/user-guide/index.rst
@@ -8,6 +8,7 @@ User guide
 
     installation
     arrays
+    data_types
     groups
     attributes
     storage
diff --git a/src/zarr/core/dtype/_numpy.py b/src/zarr/core/dtype/_numpy.py
index b98cc100e3..362f7f361c 100644
--- a/src/zarr/core/dtype/_numpy.py
+++ b/src/zarr/core/dtype/_numpy.py
@@ -569,7 +569,7 @@ def check_dtype(cls: type[Self], dtype: TDType) -> TypeGuard[np.dtypes.VoidDType
         return super().check_dtype(dtype) and dtype.fields is None
 
     @classmethod
-    def check_json(cls, data: dict[str, JSON]) -> TypeGuard[dict[str, JSON]]:
+    def check_dict(cls, data: dict[str, JSON]) -> TypeGuard[dict[str, JSON]]:
         # Overriding the base class implementation because the r* dtype
         # does not have a name that will can appear in array metadata
         # Instead, array metadata will contain names like "r8", "r16", etc
@@ -787,7 +787,7 @@ def to_dict(self) -> dict[str, JSON]:
         return base_dict
 
     @classmethod
-    def check_json(cls, data: JSON) -> bool:
+    def check_dict(cls, data: JSON) -> bool:
         return (
             isinstance(data, dict)
             and "name" in data
@@ -797,7 +797,7 @@ def check_json(cls, data: JSON) -> bool:
 
     @classmethod
     def from_dict(cls, data: dict[str, JSON]) -> Self:
-        if cls.check_json(data):
+        if cls.check_dict(data):
             from zarr.core.dtype import get_data_type_from_dict
 
             fields = tuple(
diff --git a/src/zarr/core/dtype/wrapper.py b/src/zarr/core/dtype/wrapper.py
index 002bd100e9..eecb1f2562 100644
--- a/src/zarr/core/dtype/wrapper.py
+++ b/src/zarr/core/dtype/wrapper.py
@@ -39,24 +39,6 @@ class DTypeWrapper(Generic[TDType, TScalar], ABC, Metadata):
     dtype_cls: ClassVar[type[TDType]]  # type: ignore[misc]
     _zarr_v3_name: ClassVar[str]
 
-    @classmethod
-    @abstractmethod
-    def _from_dtype_unsafe(cls: type[Self], dtype: TDType) -> Self:
-        """
-        Wrap a native dtype without checking.
-
-        Parameters
-        ----------
-        dtype : TDType
-            The native dtype to wrap.
-
-        Returns
-        -------
-        Self
-            The wrapped dtype.
-        """
-        raise NotImplementedError
-
     @classmethod
     def from_dtype(cls: type[Self], dtype: TDType) -> Self:
         """
@@ -83,6 +65,25 @@ def from_dtype(cls: type[Self], dtype: TDType) -> Self:
             f"Invalid dtype: {dtype}. Expected an instance of {cls.dtype_cls}."
         )
 
+
+    @classmethod
+    @abstractmethod
+    def _from_dtype_unsafe(cls: type[Self], dtype: TDType) -> Self:
+        """
+        Wrap a native dtype without checking.
+
+        Parameters
+        ----------
+        dtype : TDType
+            The native dtype to wrap.
+
+        Returns
+        -------
+        Self
+            The wrapped dtype.
+        """
+        raise NotImplementedError
+
     @abstractmethod
     def to_dtype(self: Self) -> TDType:
         """
@@ -158,7 +159,7 @@ def check_dtype(cls: type[Self], dtype: TDType) -> TypeGuard[TDType]:
         return type(dtype) is cls.dtype_cls
 
     @classmethod
-    def check_json(cls: type[Self], data: dict[str, JSON]) -> TypeGuard[dict[str, JSON]]:
+    def check_dict(cls: type[Self], data: dict[str, JSON]) -> TypeGuard[dict[str, JSON]]:
         """
         Check that a JSON representation of a data type matches the dtype_cls class attribute. Used
         as a type guard. This base implementation checks that the input is a dictionary,
@@ -192,7 +193,7 @@ def from_dict(cls: type[Self], data: dict[str, JSON]) -> Self:
         Self
             The wrapped data type.
         """
-        if cls.check_json(data):
+        if cls.check_dict(data):
             return cls._from_json_unsafe(data)
         raise DataTypeValidationError(f"Invalid JSON representation of data type {cls}.")
 

From b22f324bfa787e336e8afd05834fe691939f2a91 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Thu, 13 Mar 2025 14:11:03 +0100
Subject: [PATCH 023/130] more design doc

---
 docs/user-guide/data_types.rst | 90 +++++++++++++++++-----------------
 1 file changed, 45 insertions(+), 45 deletions(-)

diff --git a/docs/user-guide/data_types.rst b/docs/user-guide/data_types.rst
index 19095e1851..7a5825bf2f 100644
--- a/docs/user-guide/data_types.rst
+++ b/docs/user-guide/data_types.rst
@@ -4,19 +4,19 @@ Data types
 Zarr's data type model
 ----------------------
 
-Every Zarr array has a "data type", which defines the meaning and physical layout of the 
+Every Zarr array has a "data type", which defines the meaning and physical layout of the
 array's elements. Zarr is heavily influenced by `NumPy <https://numpy.org/doc/stable/>`_, and
 Zarr arrays can use many of the same data types as numpy arrays::
     >>> import zarr
     >>> import numpy as np
     >>> zarr.create_array(store={}, shape=(10,), dtype=np.dtype('uint8'))
     >>> z
-    <Array memory://126225407345920 shape=(10,) dtype=uint8>    
+    <Array memory://126225407345920 shape=(10,) dtype=uint8>
 
-But Zarr data types and Numpy data types are also very different in one key respect: 
-Zarr arrays are designed to be persisted to storage and later read, possibly by Zarr implementations in different programming languages. 
-So in addition to defining a memory layout for array elements, each Zarr data type defines a procedure for 
-reading and writing that data type to Zarr array metadata, and also reading and writing **instances** of that data type to 
+But Zarr data types and Numpy data types are also very different in one key respect:
+Zarr arrays are designed to be persisted to storage and later read, possibly by Zarr implementations in different programming languages.
+So in addition to defining a memory layout for array elements, each Zarr data type defines a procedure for
+reading and writing that data type to Zarr array metadata, and also reading and writing **instances** of that data type to
 array metadata.
 
 Data types in Zarr version 2
@@ -35,11 +35,11 @@ Thus the JSON identifer for a Numpy-compatible data type is just the Numpy ``str
     >>> dtype_meta
     <i8
 
-.. note:: 
+.. note::
     The ``<`` character in the data type metadata encodes the `endianness https://numpy.org/doc/2.2/reference/generated/numpy.dtype.byteorder.html`_, or "byte order", of the data type. Following Numpy's example,
 Zarr version 2 data types associate each data type with an endianness where applicable. Zarr version 3 data types do not store endianness information.
 
-In addition to defining a representation of the data type itself (which in the example above was just a simple string ``"<i8"``, Zarr also 
+In addition to defining a representation of the data type itself (which in the example above was just a simple string ``"<i8"``, Zarr also
 defines a metadata representation of scalars associated with that data type. Integers are stored as ``JSON`` numbers,
 as are floats, with the caveat that `NaN`, positive infinity, and negative infinity are stored as special strings.
 
@@ -52,105 +52,105 @@ Data types in Zarr version 3
 Data types in Zarr-Python
 -------------------------
 
-Zarr-Python supports two different Zarr formats, and those two formats specify data types in rather different ways: 
-data types in Zarr version 2 are encoded as Numpy-compatible strings, while data types in Zarr version 3 are encoded as either strings or ``JSON`` objects, 
-and the Zarr V3 data types don't have any associated endianness information, unlike Zarr V2 data types. 
+Zarr-Python supports two different Zarr formats, and those two formats specify data types in rather different ways:
+data types in Zarr version 2 are encoded as Numpy-compatible strings, while data types in Zarr version 3 are encoded as either strings or ``JSON`` objects,
+and the Zarr V3 data types don't have any associated endianness information, unlike Zarr V2 data types.
 
-If that wasn't enough, we want Zarr-Python to support data types beyond what's available in Numpy. So it's crucial that we have a 
+If that wasn't enough, we want Zarr-Python to support data types beyond what's available in Numpy. So it's crucial that we have a
 model of array data types that can adapt to the differences between Zarr V2 and V3 and doesn't over-fit to Numpy.
 
 Here are the operations we need to perform on data types in Zarr-Python:
 
 * Round-trip native data types to fields in array metadata documents.
-    For example, the Numpy data type ``np.dtype('>i2')`` should be saved as ``{..., "dtype" : ">i2"}`` in Zarr V2 metadata. 
-    
+    For example, the Numpy data type ``np.dtype('>i2')`` should be saved as ``{..., "dtype" : ">i2"}`` in Zarr V2 metadata.
+
     In Zarr V3 metadata, the same Numpy data type would be saved as  ``{..., "data_type": "int16", "codecs": [..., {"name": "bytes", "configuration": {"endian": "big"}, ...]}``
 
-* Define a default fill value. This is not mandated by the Zarr specifications, but it's convenient for users 
-  to have a useful default. For numeric types like integers and floats the default can be statically set to 0, but for 
+* Define a default fill value. This is not mandated by the Zarr specifications, but it's convenient for users
+  to have a useful default. For numeric types like integers and floats the default can be statically set to 0, but for
   parametric data types like fixed-length strings the default can only be generated after the data type has been parametrized at runtime.
 
 * Round-trip scalars to the ``fill_value`` field in Zarr V2 and V3 array metadata documents. The Zarr V2 and V3 specifications
   define how scalars of each data type should be stored as JSON in array metadata documents, and in principle each data type
   can define this encoding separately.
 
-* Do all of the above for *user-defined data types*. Zarr-Python should support data types added as extensions,so we cannot 
-  hard-code the list of data types. We need to ensure that users can easily (or easily enough) define a python object 
-  that models their custom data type and register this object with Zarr-Python, so that the above operations all succeed for their 
+* Do all of the above for *user-defined data types*. Zarr-Python should support data types added as extensions,so we cannot
+  hard-code the list of data types. We need to ensure that users can easily (or easily enough) define a python object
+  that models their custom data type and register this object with Zarr-Python, so that the above operations all succeed for their
   custom data type.
 
-To achieve these goals, Zarr Python uses a class called :class:`zarr.core.dtype.DTypeWrapper` to wrap native data types. Each data type 
-supported by Zarr Python is modeled by a subclass of `DTypeWrapper`, which has the following structure: 
+To achieve these goals, Zarr Python uses a class called :class:`zarr.core.dtype.DTypeWrapper` to wrap native data types. Each data type
+supported by Zarr Python is modeled by a subclass of `DTypeWrapper`, which has the following structure:
 
 (attribute) ``dtype_cls``
 ^^^^^^^^^^^^^
 The ``dtype_cls`` attribute is a **class variable** that is bound to a class that can produce
-an instance of a native data type. For example, on the ``DTypeWrapper`` used to model the boolean 
-data type, the ``dtype_cls`` attribute is bound to the numpy bool data type class: ``np.dtypes.BoolDType``. 
-This attribute is used when we need to create an instance of the native data type, for example when 
-defining a Numpy array that will contain Zarr data. 
+an instance of a native data type. For example, on the ``DTypeWrapper`` used to model the boolean
+data type, the ``dtype_cls`` attribute is bound to the numpy bool data type class: ``np.dtypes.BoolDType``.
+This attribute is used when we need to create an instance of the native data type, for example when
+defining a Numpy array that will contain Zarr data.
 
-It might seem odd that ``DTypeWrapper.dtype_cls`` binds to a *class* that produces a native data type instead of an instance of that native data type -- 
+It might seem odd that ``DTypeWrapper.dtype_cls`` binds to a *class* that produces a native data type instead of an instance of that native data type --
 why not have a ``DTypeWrapper.dtype`` attribute that binds to ``np.dtypes.BoolDType()``? The reason why ``DTypeWrapper``
-doesn't wrap a concrete data type instance is because data type instances may have endianness information, but Zarr V3 
-data types do not. To model Zarr V3 data types, we need endianness to be an **instance variable** which is 
-defined when creating an instance of the ```DTypeWrapper``. Subclasses of ``DTypeWrapper`` that model data types with 
+doesn't wrap a concrete data type instance is because data type instances may have endianness information, but Zarr V3
+data types do not. To model Zarr V3 data types, we need endianness to be an **instance variable** which is
+defined when creating an instance of the ```DTypeWrapper``. Subclasses of ``DTypeWrapper`` that model data types with
 byte order semantics thus have ``endianness`` as an instance variable, and this value can be set when creating an instance of the wrapper.
 
 
 (attribute) ``_zarr_v3_name``
 ^^^^^^^^^^^^^
-The ``_zarr_v3_name`` attribute encodes the canonical name for a data type for Zarr V3. For many data types these names 
+The ``_zarr_v3_name`` attribute encodes the canonical name for a data type for Zarr V3. For many data types these names
 are defined in the `Zarr V3 specification https://zarr-specs.readthedocs.io/en/latest/v3/core/v3.0.html#data-types`_ For nearly all of the
 data types defined in Zarr V3, this name can be used to uniquely specify a data type. The one exception is the ``r*`` data type,
-which is parametrized by a number of bits, and so may take the form ``r8``, ``r16``, ... etc. 
+which is parametrized by a number of bits, and so may take the form ``r8``, ``r16``, ... etc.
 
 (class method) ``from_dtype(cls, dtype) -> Self``
 ^^^^^^^^^
 This method defines a procedure for safely converting a native dtype instance into an instance of ``DTypeWrapper``. It should perform
-validation of its input to ensure that the native dtype is an instance of the ``dtype_cls`` class attribute, for example. For some 
-data types, additional checks are needed -- in Numpy "structured" data types and "void" data types use the same class, with different properties. 
+validation of its input to ensure that the native dtype is an instance of the ``dtype_cls`` class attribute, for example. For some
+data types, additional checks are needed -- in Numpy "structured" data types and "void" data types use the same class, with different properties.
 A ``DTypeWrapper`` that wraps Numpy structured data types must do additional checks to ensure that the input ``dtype`` is actually a structured data type.
-If input validation succeeds, this method will call ``_from_dtype_unsafe``.  
+If input validation succeeds, this method will call ``_from_dtype_unsafe``.
 
 (class method) ``_from_dtype_unsafe(cls, dtype) -> Self``
 ^^^^^^^^^^
 This method defines the procedure for converting a native data type instance, like ``np.dtype('uint8')``,
-into a wrapper class instance. The ``unsafe`` prefix on the method name denotes that this method should not 
-perform any input validation. Input validation should be done by the routine that calls this method. 
+into a wrapper class instance. The ``unsafe`` prefix on the method name denotes that this method should not
+perform any input validation. Input validation should be done by the routine that calls this method.
 
 For many data types, creating the wrapper class takes no arguments and so this method can just return ``cls()``.
-But for data types with runtime attributes like endianness or length (for fixed-size strings), this ``_from_dtype_unsafe`` 
+But for data types with runtime attributes like endianness or length (for fixed-size strings), this ``_from_dtype_unsafe``
 ensures that those attributes of ``dtype`` are mapped on to the correct parameters in the ``DTypeWrapper`` class constructor.
 
 (method) ``to_dtype(self) -> dtype``
 ^^^^^^^
-This method produces a native data type consistent with the properties of the ``DTypeWrapper``. Together 
+This method produces a native data type consistent with the properties of the ``DTypeWrapper``. Together
 with ``from_dtype``, this method allows round-trip conversion of a native data type in to a wrapper class and then out again.
 
 That is, for some ``DTypeWrapper`` class ``FooWrapper`` that wraps a native data type called ``foo``, ``FooWrapper.from_dtype(instance_of_foo).to_dtype() == instance_of_foo`` should be true.
 
-(method) ``to_dict(self) -> dict`` 
+(method) ``to_dict(self) -> dict``
 ^^^^^
-This method generates a JSON-serialiazable representation of the wrapped data type which can be stored in 
+This method generates a JSON-serialiazable representation of the wrapped data type which can be stored in
 Zarr metadata.
 
 (method) ``cast_value(self, value: object) -> scalar``
 ^^^^^
-Cast a python object to an instance of the wrapped data type. This is used for generating the default 
+Cast a python object to an instance of the wrapped data type. This is used for generating the default
 value associated with this data type.
 
 
 (method) ``default_value(self) -> scalar``
 ^^^^
-Return the default value for the wrapped data type. Zarr-Python uses this method to generate a default fill value 
-for an array when a user has not requested one. 
+Return the default value for the wrapped data type. Zarr-Python uses this method to generate a default fill value
+for an array when a user has not requested one.
 
-Why is this a method and not a static attribute? Although some data types 
+Why is this a method and not a static attribute? Although some data types
 can have a static default value, parametrized data types like fixed-length strings or structured data types cannot. For these data types,
 a default value must be calculated based on the attributes of the wrapped data type.
 
-(method) ``
+(method) ``check_dtype(cls, dtype)``
 
 
 

From b7a231e08c978ab0f229957fc6a52cec8aca11a2 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Thu, 13 Mar 2025 16:27:06 +0100
Subject: [PATCH 024/130] update docs

---
 docs/user-guide/data_types.rst | 64 ++++++++++++++++++++++++----------
 src/zarr/core/dtype/wrapper.py | 29 ++++++++-------
 2 files changed, 59 insertions(+), 34 deletions(-)

diff --git a/docs/user-guide/data_types.rst b/docs/user-guide/data_types.rst
index 7a5825bf2f..83b9870755 100644
--- a/docs/user-guide/data_types.rst
+++ b/docs/user-guide/data_types.rst
@@ -6,24 +6,24 @@ Zarr's data type model
 
 Every Zarr array has a "data type", which defines the meaning and physical layout of the
 array's elements. Zarr is heavily influenced by `NumPy <https://numpy.org/doc/stable/>`_, and
-Zarr arrays can use many of the same data types as numpy arrays::
+Zarr-Python supports creating arrays with Numpy data types::
     >>> import zarr
     >>> import numpy as np
     >>> zarr.create_array(store={}, shape=(10,), dtype=np.dtype('uint8'))
     >>> z
     <Array memory://126225407345920 shape=(10,) dtype=uint8>
 
-But Zarr data types and Numpy data types are also very different in one key respect:
-Zarr arrays are designed to be persisted to storage and later read, possibly by Zarr implementations in different programming languages.
-So in addition to defining a memory layout for array elements, each Zarr data type defines a procedure for
+But Zarr data types and Numpy data types are also very different:
+Unlike Numpy arrays, Zarr arrays are designed to be persisted to storage and read by Zarr implementations in different programming languages.
+To ensure that the data type can be interpreted correctly when reading an array, each Zarr data type defines a procedure for
 reading and writing that data type to Zarr array metadata, and also reading and writing **instances** of that data type to
-array metadata.
+array metadata, and these serialization procedures depend on the Zarr format.
 
 Data types in Zarr version 2
 -----------------------------
 
 Version 2 of the Zarr format defined its data types relative to `Numpy's data types <https://numpy.org/doc/2.1/reference/arrays.dtypes.html#data-type-objects-dtype>`_, and added a few non-Numpy data types as well.
-Thus the JSON identifer for a Numpy-compatible data type is just the Numpy ``str`` attribute of that dtype:
+Thus the JSON identifier for a Numpy-compatible data type is just the Numpy ``str`` attribute of that dtype:
 
     >>> import zarr
     >>> import numpy as np
@@ -113,16 +113,6 @@ data types, additional checks are needed -- in Numpy "structured" data types and
 A ``DTypeWrapper`` that wraps Numpy structured data types must do additional checks to ensure that the input ``dtype`` is actually a structured data type.
 If input validation succeeds, this method will call ``_from_dtype_unsafe``.
 
-(class method) ``_from_dtype_unsafe(cls, dtype) -> Self``
-^^^^^^^^^^
-This method defines the procedure for converting a native data type instance, like ``np.dtype('uint8')``,
-into a wrapper class instance. The ``unsafe`` prefix on the method name denotes that this method should not
-perform any input validation. Input validation should be done by the routine that calls this method.
-
-For many data types, creating the wrapper class takes no arguments and so this method can just return ``cls()``.
-But for data types with runtime attributes like endianness or length (for fixed-size strings), this ``_from_dtype_unsafe``
-ensures that those attributes of ``dtype`` are mapped on to the correct parameters in the ``DTypeWrapper`` class constructor.
-
 (method) ``to_dtype(self) -> dtype``
 ^^^^^^^
 This method produces a native data type consistent with the properties of the ``DTypeWrapper``. Together
@@ -137,20 +127,56 @@ Zarr metadata.
 
 (method) ``cast_value(self, value: object) -> scalar``
 ^^^^^
-Cast a python object to an instance of the wrapped data type. This is used for generating the default
+This method converts a python object to an instance of the wrapped data type. It is used for generating the default
 value associated with this data type.
 
 
 (method) ``default_value(self) -> scalar``
 ^^^^
-Return the default value for the wrapped data type. Zarr-Python uses this method to generate a default fill value
+This method returns the default value for the wrapped data type. Zarr-Python uses this method to generate a default fill value
 for an array when a user has not requested one.
 
 Why is this a method and not a static attribute? Although some data types
 can have a static default value, parametrized data types like fixed-length strings or structured data types cannot. For these data types,
 a default value must be calculated based on the attributes of the wrapped data type.
 
-(method) ``check_dtype(cls, dtype)``
+(class method) ``check_dtype(cls, dtype) -> bool``
+^^^^^
+This class method checks if a native dtype is compatible with the ``DTypeWrapper`` class. It returns ``True``
+if ``dtype`` is compatible with the wrapper class, and ``False`` otherwise. For many data types, this check is as simple
+as checking that ``cls.dtype_cls`` matches ``type(dtype)``, i.e. checking that the data type class wrapped
+by the ``DTypeWrapper`` is the same as the class of ``dtype``. But there are some data types where this check alone is not sufficient,
+in which case this method is overridden so that additional properties of ``dtype`` can be inspected and compared with
+the expectations of ``cls``.
+
+(class method) ``from_dict(cls, dtype) -> Self``
+^^^^
+This class method creates a ``DTypeWrapper`` from an appropriately structured dictionary. The default
+implementation first checks that the dictionary has the correct structure, and then uses its data
+to instantiate the ``DTypeWrapper`` instance.
+
+(method) ``to_dict(self) -> dict[str, JSON]``
+^^^
+Returns a dictionary form of the wrapped data type. This is used prior to writing array metadata.
 
+(class method) ``get_name(self, zarr_format: Literal[2, 3]) -> str``
+^^^^
+This method generates a name for the wrapped data type, depending on the Zarr format. If ``zarr_format`` is
+2 and the wrapped data type is a Numpy data type, then the Numpy string representation of that data type is returned.
+If ``zarr_format`` is 3, then the Zarr V3 name for the wrapped data type is returned. For most data types
+the Zarr V3 name will be stored as the ``_zarr_v3_name`` class attribute, but for parametric data types the
+name must be computed at runtime based on the parameters of the data type.
+
+
+(method) ``to_json_value(self, data: scalar, zarr_format: Literal[2, 3]) -> JSON``
+^^^
+This method converts a scalar instance of the data type into a JSON-serialiazable value.
+For some data types like bool and integers this conversion is simple -- just return a JSON boolean
+or number -- but other data types define a JSON serialization for scalars that is a bit more involved.
+And this JSON serialization depends on the Zarr format.
+
+(method) ``from_json_value(self, data: JSON, zarr_format: Literal[2, 3]) -> scalar``
+^^^
+Convert a JSON-serialiazed scalar to a native scalar. This inverts the operation of ``to_json_value``.
 
 
diff --git a/src/zarr/core/dtype/wrapper.py b/src/zarr/core/dtype/wrapper.py
index eecb1f2562..dc3a0cc5d2 100644
--- a/src/zarr/core/dtype/wrapper.py
+++ b/src/zarr/core/dtype/wrapper.py
@@ -65,7 +65,6 @@ def from_dtype(cls: type[Self], dtype: TDType) -> Self:
             f"Invalid dtype: {dtype}. Expected an instance of {cls.dtype_cls}."
         )
 
-
     @classmethod
     @abstractmethod
     def _from_dtype_unsafe(cls: type[Self], dtype: TDType) -> Self:
@@ -96,18 +95,6 @@ def to_dtype(self: Self) -> TDType:
         """
         raise NotImplementedError
 
-    @abstractmethod
-    def to_dict(self) -> dict[str, JSON]:
-        """
-        Convert the wrapped data type to a dictionary.
-
-        Returns
-        -------
-        dict[str, JSON]
-            The dictionary representation of the wrapped data type
-        """
-        raise NotImplementedError
-
     def cast_value(self: Self, value: object) -> TScalar:
         """
         Cast a value to an instance of the scalar type.
@@ -178,6 +165,18 @@ def check_dict(cls: type[Self], data: dict[str, JSON]) -> TypeGuard[dict[str, JS
         """
         return "name" in data and data["name"] == cls._zarr_v3_name
 
+    @abstractmethod
+    def to_dict(self) -> dict[str, JSON]:
+        """
+        Convert the wrapped data type to a dictionary.
+
+        Returns
+        -------
+        dict[str, JSON]
+            The dictionary representation of the wrapped data type
+        """
+        raise NotImplementedError
+
     @classmethod
     def from_dict(cls: type[Self], data: dict[str, JSON]) -> Self:
         """
@@ -194,11 +193,11 @@ def from_dict(cls: type[Self], data: dict[str, JSON]) -> Self:
             The wrapped data type.
         """
         if cls.check_dict(data):
-            return cls._from_json_unsafe(data)
+            return cls._from_dict_unsafe(data)
         raise DataTypeValidationError(f"Invalid JSON representation of data type {cls}.")
 
     @classmethod
-    def _from_json_unsafe(cls: type[Self], data: dict[str, JSON]) -> Self:
+    def _from_dict_unsafe(cls: type[Self], data: dict[str, JSON]) -> Self:
         """
         Wrap a JSON representation of a data type.
 

From 7dfcd0f6b6334f4e87a9769bc3e950791d498c70 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Thu, 13 Mar 2025 16:34:40 +0100
Subject: [PATCH 025/130] fix sphinx warnings

---
 docs/user-guide/data_types.rst | 42 +++++++++++++++++-----------------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/docs/user-guide/data_types.rst b/docs/user-guide/data_types.rst
index 83b9870755..6132eb2376 100644
--- a/docs/user-guide/data_types.rst
+++ b/docs/user-guide/data_types.rst
@@ -7,11 +7,11 @@ Zarr's data type model
 Every Zarr array has a "data type", which defines the meaning and physical layout of the
 array's elements. Zarr is heavily influenced by `NumPy <https://numpy.org/doc/stable/>`_, and
 Zarr-Python supports creating arrays with Numpy data types::
-    >>> import zarr
-    >>> import numpy as np
-    >>> zarr.create_array(store={}, shape=(10,), dtype=np.dtype('uint8'))
-    >>> z
-    <Array memory://126225407345920 shape=(10,) dtype=uint8>
+>>> import zarr
+>>> import numpy as np
+>>> zarr.create_array(store={}, shape=(10,), dtype=np.dtype('uint8'))
+>>> z
+<Array memory://126225407345920 shape=(10,) dtype=uint8>
 
 But Zarr data types and Numpy data types are also very different:
 Unlike Numpy arrays, Zarr arrays are designed to be persisted to storage and read by Zarr implementations in different programming languages.
@@ -36,8 +36,8 @@ Thus the JSON identifier for a Numpy-compatible data type is just the Numpy ``st
     <i8
 
 .. note::
-    The ``<`` character in the data type metadata encodes the `endianness https://numpy.org/doc/2.2/reference/generated/numpy.dtype.byteorder.html`_, or "byte order", of the data type. Following Numpy's example,
-Zarr version 2 data types associate each data type with an endianness where applicable. Zarr version 3 data types do not store endianness information.
+  The ``<`` character in the data type metadata encodes the `endianness <https://numpy.org/doc/2.2/reference/generated/numpy.dtype.byteorder.html>`_, or "byte order", of the data type. Following Numpy's example,
+  Zarr version 2 data types associate each data type with an endianness where applicable. Zarr version 3 data types do not store endianness information.
 
 In addition to defining a representation of the data type itself (which in the example above was just a simple string ``"<i8"``, Zarr also
 defines a metadata representation of scalars associated with that data type. Integers are stored as ``JSON`` numbers,
@@ -83,7 +83,7 @@ To achieve these goals, Zarr Python uses a class called :class:`zarr.core.dtype.
 supported by Zarr Python is modeled by a subclass of `DTypeWrapper`, which has the following structure:
 
 (attribute) ``dtype_cls``
-^^^^^^^^^^^^^
+^^^^^^^^^^^^^^^^^^^^^^^^^
 The ``dtype_cls`` attribute is a **class variable** that is bound to a class that can produce
 an instance of a native data type. For example, on the ``DTypeWrapper`` used to model the boolean
 data type, the ``dtype_cls`` attribute is bound to the numpy bool data type class: ``np.dtypes.BoolDType``.
@@ -99,14 +99,14 @@ byte order semantics thus have ``endianness`` as an instance variable, and this
 
 
 (attribute) ``_zarr_v3_name``
-^^^^^^^^^^^^^
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 The ``_zarr_v3_name`` attribute encodes the canonical name for a data type for Zarr V3. For many data types these names
-are defined in the `Zarr V3 specification https://zarr-specs.readthedocs.io/en/latest/v3/core/v3.0.html#data-types`_ For nearly all of the
+are defined in the `Zarr V3 specification <https://zarr-specs.readthedocs.io/en/latest/v3/core/v3.0.html#data-types>`_ For nearly all of the
 data types defined in Zarr V3, this name can be used to uniquely specify a data type. The one exception is the ``r*`` data type,
 which is parametrized by a number of bits, and so may take the form ``r8``, ``r16``, ... etc.
 
 (class method) ``from_dtype(cls, dtype) -> Self``
-^^^^^^^^^
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 This method defines a procedure for safely converting a native dtype instance into an instance of ``DTypeWrapper``. It should perform
 validation of its input to ensure that the native dtype is an instance of the ``dtype_cls`` class attribute, for example. For some
 data types, additional checks are needed -- in Numpy "structured" data types and "void" data types use the same class, with different properties.
@@ -114,25 +114,25 @@ A ``DTypeWrapper`` that wraps Numpy structured data types must do additional che
 If input validation succeeds, this method will call ``_from_dtype_unsafe``.
 
 (method) ``to_dtype(self) -> dtype``
-^^^^^^^
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 This method produces a native data type consistent with the properties of the ``DTypeWrapper``. Together
 with ``from_dtype``, this method allows round-trip conversion of a native data type in to a wrapper class and then out again.
 
 That is, for some ``DTypeWrapper`` class ``FooWrapper`` that wraps a native data type called ``foo``, ``FooWrapper.from_dtype(instance_of_foo).to_dtype() == instance_of_foo`` should be true.
 
 (method) ``to_dict(self) -> dict``
-^^^^^
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 This method generates a JSON-serialiazable representation of the wrapped data type which can be stored in
 Zarr metadata.
 
 (method) ``cast_value(self, value: object) -> scalar``
-^^^^^
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 This method converts a python object to an instance of the wrapped data type. It is used for generating the default
 value associated with this data type.
 
 
 (method) ``default_value(self) -> scalar``
-^^^^
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 This method returns the default value for the wrapped data type. Zarr-Python uses this method to generate a default fill value
 for an array when a user has not requested one.
 
@@ -141,7 +141,7 @@ can have a static default value, parametrized data types like fixed-length strin
 a default value must be calculated based on the attributes of the wrapped data type.
 
 (class method) ``check_dtype(cls, dtype) -> bool``
-^^^^^
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 This class method checks if a native dtype is compatible with the ``DTypeWrapper`` class. It returns ``True``
 if ``dtype`` is compatible with the wrapper class, and ``False`` otherwise. For many data types, this check is as simple
 as checking that ``cls.dtype_cls`` matches ``type(dtype)``, i.e. checking that the data type class wrapped
@@ -150,17 +150,17 @@ in which case this method is overridden so that additional properties of ``dtype
 the expectations of ``cls``.
 
 (class method) ``from_dict(cls, dtype) -> Self``
-^^^^
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 This class method creates a ``DTypeWrapper`` from an appropriately structured dictionary. The default
 implementation first checks that the dictionary has the correct structure, and then uses its data
 to instantiate the ``DTypeWrapper`` instance.
 
 (method) ``to_dict(self) -> dict[str, JSON]``
-^^^
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 Returns a dictionary form of the wrapped data type. This is used prior to writing array metadata.
 
 (class method) ``get_name(self, zarr_format: Literal[2, 3]) -> str``
-^^^^
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 This method generates a name for the wrapped data type, depending on the Zarr format. If ``zarr_format`` is
 2 and the wrapped data type is a Numpy data type, then the Numpy string representation of that data type is returned.
 If ``zarr_format`` is 3, then the Zarr V3 name for the wrapped data type is returned. For most data types
@@ -169,14 +169,14 @@ name must be computed at runtime based on the parameters of the data type.
 
 
 (method) ``to_json_value(self, data: scalar, zarr_format: Literal[2, 3]) -> JSON``
-^^^
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 This method converts a scalar instance of the data type into a JSON-serialiazable value.
 For some data types like bool and integers this conversion is simple -- just return a JSON boolean
 or number -- but other data types define a JSON serialization for scalars that is a bit more involved.
 And this JSON serialization depends on the Zarr format.
 
 (method) ``from_json_value(self, data: JSON, zarr_format: Literal[2, 3]) -> scalar``
-^^^
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 Convert a JSON-serialiazed scalar to a native scalar. This inverts the operation of ``to_json_value``.
 
 

From 706e6b636cb2428aa13a773b2099b0d0ed405c0c Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Thu, 13 Mar 2025 16:59:52 +0100
Subject: [PATCH 026/130] tweak docs

---
 docs/user-guide/data_types.rst | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/docs/user-guide/data_types.rst b/docs/user-guide/data_types.rst
index 6132eb2376..94e05de62d 100644
--- a/docs/user-guide/data_types.rst
+++ b/docs/user-guide/data_types.rst
@@ -7,17 +7,17 @@ Zarr's data type model
 Every Zarr array has a "data type", which defines the meaning and physical layout of the
 array's elements. Zarr is heavily influenced by `NumPy <https://numpy.org/doc/stable/>`_, and
 Zarr-Python supports creating arrays with Numpy data types::
->>> import zarr
->>> import numpy as np
->>> zarr.create_array(store={}, shape=(10,), dtype=np.dtype('uint8'))
->>> z
-<Array memory://126225407345920 shape=(10,) dtype=uint8>
 
-But Zarr data types and Numpy data types are also very different:
+  >>> import zarr
+  >>> import numpy as np
+  >>> zarr.create_array(store={}, shape=(10,), dtype=np.dtype('uint8'))
+  >>> z
+  <Array memory://126225407345920 shape=(10,) dtype=uint8>
+
 Unlike Numpy arrays, Zarr arrays are designed to be persisted to storage and read by Zarr implementations in different programming languages.
-To ensure that the data type can be interpreted correctly when reading an array, each Zarr data type defines a procedure for
-reading and writing that data type to Zarr array metadata, and also reading and writing **instances** of that data type to
-array metadata, and these serialization procedures depend on the Zarr format.
+This means Zarr data types must be interpreted correctly when clients read an array. So each Zarr data type defines a procedure for
+encoding / decoding that data type to / from Zarr array metadata, and also encoding / decoding **instances** of that data type to / from
+array metadata. These serialization procedures depend on the Zarr format.
 
 Data types in Zarr version 2
 -----------------------------
@@ -56,7 +56,7 @@ Zarr-Python supports two different Zarr formats, and those two formats specify d
 data types in Zarr version 2 are encoded as Numpy-compatible strings, while data types in Zarr version 3 are encoded as either strings or ``JSON`` objects,
 and the Zarr V3 data types don't have any associated endianness information, unlike Zarr V2 data types.
 
-If that wasn't enough, we want Zarr-Python to support data types beyond what's available in Numpy. So it's crucial that we have a
+We also want Zarr-Python to support data types beyond what's available in Numpy. So it's crucial that we have a
 model of array data types that can adapt to the differences between Zarr V2 and V3 and doesn't over-fit to Numpy.
 
 Here are the operations we need to perform on data types in Zarr-Python:

From 8fbf67347d41d584226652637eccc0e1cd000333 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Thu, 13 Mar 2025 17:05:49 +0100
Subject: [PATCH 027/130] info about v3 data types

---
 docs/user-guide/data_types.rst | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/docs/user-guide/data_types.rst b/docs/user-guide/data_types.rst
index 94e05de62d..2c6a98753c 100644
--- a/docs/user-guide/data_types.rst
+++ b/docs/user-guide/data_types.rst
@@ -46,8 +46,9 @@ as are floats, with the caveat that `NaN`, positive infinity, and negative infin
 Data types in Zarr version 3
 ----------------------------
 
+* Data type names are different -- Zarr V2 represented the 16 bit unsigned integer data type as ``>i2``; Zarr V3 represents the same data type as ``int16``.
 * No endianness
-* Data type can be encoded as a string or a ``JSON`` object with the structure ``{"name": <string identifier>, "configuration": {...}}``
+* A data type can be encoded in metadata as a string or a ``JSON`` object with the structure ``{"name": <string identifier>, "configuration": {...}}``
 
 Data types in Zarr-Python
 -------------------------

From e9aff64055aafb6b833b126bdce5dfabed5ed69f Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Thu, 13 Mar 2025 17:12:45 +0100
Subject: [PATCH 028/130] adjust note

---
 docs/user-guide/data_types.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/user-guide/data_types.rst b/docs/user-guide/data_types.rst
index 2c6a98753c..8fcfaac794 100644
--- a/docs/user-guide/data_types.rst
+++ b/docs/user-guide/data_types.rst
@@ -37,7 +37,7 @@ Thus the JSON identifier for a Numpy-compatible data type is just the Numpy ``st
 
 .. note::
   The ``<`` character in the data type metadata encodes the `endianness <https://numpy.org/doc/2.2/reference/generated/numpy.dtype.byteorder.html>`_, or "byte order", of the data type. Following Numpy's example,
-  Zarr version 2 data types associate each data type with an endianness where applicable. Zarr version 3 data types do not store endianness information.
+  in Zarr version 2 each data type has an endianness where applicable. However, Zarr version 3 data types do not store endianness information.
 
 In addition to defining a representation of the data type itself (which in the example above was just a simple string ``"<i8"``, Zarr also
 defines a metadata representation of scalars associated with that data type. Integers are stored as ``JSON`` numbers,

From 44e78f524ea195ac4a4ad6390b784ec043848cb2 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Thu, 13 Mar 2025 17:41:56 +0100
Subject: [PATCH 029/130] fix: use unparametrized types in direct assignment

---
 src/zarr/core/dtype/_numpy.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/zarr/core/dtype/_numpy.py b/src/zarr/core/dtype/_numpy.py
index 362f7f361c..caf46bb216 100644
--- a/src/zarr/core/dtype/_numpy.py
+++ b/src/zarr/core/dtype/_numpy.py
@@ -524,7 +524,7 @@ def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.bytes_:
 
 @dataclass(frozen=True, kw_only=True)
 class FixedLengthBytes(DTypeWrapper[np.dtypes.VoidDType[Any], np.void]):
-    dtype_cls = np.dtypes.VoidDType[Any]
+    dtype_cls = np.dtypes.VoidDType
     _zarr_v3_name = "r*"
     item_size_bits: ClassVar[int] = 8
     length: int = 1
@@ -591,8 +591,8 @@ def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.void:
 
 @dataclass(frozen=True, kw_only=True)
 class FixedLengthUnicodeString(DTypeWrapper[np.dtypes.StrDType[int], np.str_]):
-    dtype_cls = np.dtypes.StrDType[int]
-    _zarr_v3_name = "numpy.static_unicode_string"
+    dtype_cls = np.dtypes.StrDType
+    _zarr_v3_name = "numpy.fixed_length_unicode_string"
     item_size_bits: ClassVar[int] = 32  # UCS4 is 32 bits per code point
     endianness: Endianness | None = "native"
     length: int = 1

From 60cac0496b353244194e483d661af7934c059fdd Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Mon, 17 Mar 2025 10:04:13 +0100
Subject: [PATCH 030/130] start fixing config

---
 src/zarr/core/array.py        | 19 +++++++----------
 src/zarr/core/config.py       | 39 +++++++++++++----------------------
 src/zarr/core/dtype/_numpy.py |  4 ++--
 3 files changed, 23 insertions(+), 39 deletions(-)

diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py
index a060bcbfae..465a2b6cc8 100644
--- a/src/zarr/core/array.py
+++ b/src/zarr/core/array.py
@@ -70,7 +70,6 @@
 from zarr.core.dtype import (
     DTypeWrapper,
     FixedLengthAsciiString,
-    FixedLengthUnicodeString,
     VariableLengthString,
     parse_data_type,
 )
@@ -4248,19 +4247,15 @@ def _get_default_chunk_encoding_v2(
     """
     Get the default chunk encoding for Zarr format 2 arrays, given a dtype
     """
-    from numcodecs import VLenBytes as numcodecs_VLenBytes
-    from numcodecs import VLenUTF8 as numcodecs_VLenUTF8
-    from numcodecs import Zstd as numcodecs_zstd
-
-    if isinstance(dtype, VariableLengthString | FixedLengthUnicodeString):
-        filters = (numcodecs_VLenUTF8(),)
-    elif isinstance(dtype, FixedLengthAsciiString):
-        filters = (numcodecs_VLenBytes(),)
+    if dtype._zarr_v3_name in zarr_config.get("array.v2_default_filters"):
+        filters = zarr_config.get(f"array.v2_default_filters.{dtype._zarr_v3_name}")
     else:
-        filters = None
-
-    compressor = numcodecs_zstd(level=0, checksum=False)
+        filters = zarr_config.get("array.v2_default_filters.default")
 
+    if dtype._zarr_v3_name in zarr_config.get("array.v2_default_compressor"):
+        compressor = zarr_config.get(f"array.v2_default_compressor.{dtype._zarr_v3_name}")
+    else:
+        compressor = zarr_config.get("array.v2_default_compressor.default")
     return filters, compressor
 
 
diff --git a/src/zarr/core/config.py b/src/zarr/core/config.py
index 98252f572c..71c311d7d5 100644
--- a/src/zarr/core/config.py
+++ b/src/zarr/core/config.py
@@ -36,6 +36,8 @@
 if TYPE_CHECKING:
     from donfig.config_obj import ConfigSet
 
+from collections import defaultdict
+
 
 class BadConfigError(ValueError):
     _msg = "bad Config: %r"
@@ -77,37 +79,24 @@ def enable_gpu(self) -> ConfigSet:
             "array": {
                 "order": "C",
                 "write_empty_chunks": False,
-                "v2_default_compressor": {
-                    "numeric": {"id": "zstd", "level": 0, "checksum": False},
-                    "string": {"id": "zstd", "level": 0, "checksum": False},
-                    "bytes": {"id": "zstd", "level": 0, "checksum": False},
-                },
+                "v2_default_compressor": {"default": {"id": "zstd", "level": 0, "checksum": False}},
                 "v2_default_filters": {
-                    "numeric": None,
-                    "string": [{"id": "vlen-utf8"}],
-                    "bytes": [{"id": "vlen-bytes"}],
-                    "raw": None,
+                    "default": None,
+                    "numpy.variable_length_unicode_string": [{"id": "vlen-utf8"}],
+                    "numpy.fixed_length_unicode_string": [{"id": "vlen-utf8"}],
+                    "r*": [{"id": "vlen-bytes"}],
                 },
-                "v3_default_filters": {"boolean": [], "numeric": [], "string": [], "bytes": []},
+                "v3_default_filters": defaultdict(list),
                 "v3_default_serializer": {
-                    "boolean": {"name": "bytes", "configuration": {"endian": "little"}},
-                    "numeric": {"name": "bytes", "configuration": {"endian": "little"}},
-                    "string": {"name": "vlen-utf8"},
-                    "bytes": {"name": "vlen-bytes"},
+                    "default": {"name": "bytes", "configuration": {"endian": "little"}},
+                    "numpy.variable_length_unicode_string": [{"name": "vlen-utf8"}],
+                    "numpy.fixed_length_unicode_string": [{"name": "vlen-utf8"}],
+                    "r*": {"name": "vlen-bytes"},
                 },
                 "v3_default_compressors": {
-                    "boolean": [
-                        {"name": "zstd", "configuration": {"level": 0, "checksum": False}},
-                    ],
-                    "numeric": [
-                        {"name": "zstd", "configuration": {"level": 0, "checksum": False}},
-                    ],
-                    "string": [
-                        {"name": "zstd", "configuration": {"level": 0, "checksum": False}},
-                    ],
-                    "bytes": [
+                    "default": [
                         {"name": "zstd", "configuration": {"level": 0, "checksum": False}},
-                    ],
+                    ]
                 },
             },
             "async": {"concurrency": 10, "timeout": None},
diff --git a/src/zarr/core/dtype/_numpy.py b/src/zarr/core/dtype/_numpy.py
index caf46bb216..d61fedd4ab 100644
--- a/src/zarr/core/dtype/_numpy.py
+++ b/src/zarr/core/dtype/_numpy.py
@@ -627,7 +627,7 @@ def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.str_:
     @dataclass(frozen=True, kw_only=True)
     class VariableLengthString(DTypeWrapper[np.dtypes.StringDType, str]):
         dtype_cls = np.dtypes.StringDType
-        _zarr_v3_name = "numpy.vlen_string"
+        _zarr_v3_name = "numpy.variable_length_string"
 
         @classmethod
         def _from_dtype_unsafe(cls, dtype: np.dtypes.StringDType) -> Self:
@@ -658,7 +658,7 @@ def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> str:
     @dataclass(frozen=True, kw_only=True)
     class VariableLengthString(DTypeWrapper[np.dtypes.ObjectDType, str]):
         dtype_cls = np.dtypes.ObjectDType
-        _zarr_v3_name = "numpy.vlen_string"
+        _zarr_v3_name = "numpy.variable_length_string"
 
         @classmethod
         def _from_dtype_unsafe(cls, dtype: np.dtypes.ObjectDType) -> Self:

From 120df57d6aa633c2d290db7013ce94a85c79622d Mon Sep 17 00:00:00 2001
From: Davis Bennett <davis.v.bennett@gmail.com>
Date: Mon, 17 Mar 2025 10:12:38 +0100
Subject: [PATCH 031/130] Update src/zarr/core/_info.py

Co-authored-by: Joe Hamman <jhamman1@gmail.com>
---
 src/zarr/core/_info.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/zarr/core/_info.py b/src/zarr/core/_info.py
index a632b8c602..3a3a3a5714 100644
--- a/src/zarr/core/_info.py
+++ b/src/zarr/core/_info.py
@@ -9,8 +9,6 @@
 from zarr.core.common import ZarrFormat
 from zarr.core.dtype.wrapper import DTypeWrapper
 
-# from zarr.core.metadata.v3 import DataType
-
 
 @dataclasses.dataclass(kw_only=True)
 class GroupInfo:

From 0d9922b5bb71be891764888d223684b8ff8f63e5 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Mon, 17 Mar 2025 12:20:39 +0100
Subject: [PATCH 032/130] add placeholder disclaimer to v3 data types summary

---
 docs/user-guide/data_types.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/user-guide/data_types.rst b/docs/user-guide/data_types.rst
index 8fcfaac794..91cbeb1d7f 100644
--- a/docs/user-guide/data_types.rst
+++ b/docs/user-guide/data_types.rst
@@ -45,7 +45,7 @@ as are floats, with the caveat that `NaN`, positive infinity, and negative infin
 
 Data types in Zarr version 3
 ----------------------------
-
+(note: placeholder text)
 * Data type names are different -- Zarr V2 represented the 16 bit unsigned integer data type as ``>i2``; Zarr V3 represents the same data type as ``int16``.
 * No endianness
 * A data type can be encoded in metadata as a string or a ``JSON`` object with the structure ``{"name": <string identifier>, "configuration": {...}}``

From 207595251f6ba9881972411b9943a01c0f7311e8 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Mon, 17 Mar 2025 12:22:53 +0100
Subject: [PATCH 033/130] make example runnable

---
 docs/user-guide/data_types.rst | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/docs/user-guide/data_types.rst b/docs/user-guide/data_types.rst
index 91cbeb1d7f..7039d1850a 100644
--- a/docs/user-guide/data_types.rst
+++ b/docs/user-guide/data_types.rst
@@ -28,8 +28,9 @@ Thus the JSON identifier for a Numpy-compatible data type is just the Numpy ``st
     >>> import zarr
     >>> import numpy as np
     >>> import json
+    >>> store = {}
     >>> np_dtype = np.dtype('int64')
-    >>> z = zarr.create_array(shape=(1,), dtype=np_dtype, zarr_format=2)
+    >>> z = zarr.create_array(store=store, shape=(1,), dtype=np_dtype, zarr_format=2)
     >>> dtype_meta = json.loads(store['.zarray'].to_bytes())["dtype"]
     >>> assert dtype_meta == np_dtype.str # True
     >>> dtype_meta

From 44369d68b5ca0d647dbff497c297b426cbaa3108 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Mon, 17 Mar 2025 12:25:31 +0100
Subject: [PATCH 034/130] placeholder section for adding a custom dtype

---
 docs/user-guide/data_types.rst | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/docs/user-guide/data_types.rst b/docs/user-guide/data_types.rst
index 7039d1850a..352e967c87 100644
--- a/docs/user-guide/data_types.rst
+++ b/docs/user-guide/data_types.rst
@@ -181,4 +181,7 @@ And this JSON serialization depends on the Zarr format.
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 Convert a JSON-serialiazed scalar to a native scalar. This inverts the operation of ``to_json_value``.
 
+Using a custom data type
+------------------------
 
+TODO
\ No newline at end of file

From 4f3381f12d2ed72bdf2a6b4449d6dece5e656989 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Mon, 17 Mar 2025 12:38:46 +0100
Subject: [PATCH 035/130] define native data type and native scalar

---
 docs/user-guide/data_types.rst | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/docs/user-guide/data_types.rst b/docs/user-guide/data_types.rst
index 352e967c87..fffd622209 100644
--- a/docs/user-guide/data_types.rst
+++ b/docs/user-guide/data_types.rst
@@ -58,21 +58,23 @@ Zarr-Python supports two different Zarr formats, and those two formats specify d
 data types in Zarr version 2 are encoded as Numpy-compatible strings, while data types in Zarr version 3 are encoded as either strings or ``JSON`` objects,
 and the Zarr V3 data types don't have any associated endianness information, unlike Zarr V2 data types.
 
-We also want Zarr-Python to support data types beyond what's available in Numpy. So it's crucial that we have a
-model of array data types that can adapt to the differences between Zarr V2 and V3 and doesn't over-fit to Numpy.
+We aspire for Zarr-Python to eventually be array-library-agnostic.
+In the context of data types, this means that we should not design an API that overfits to Numpy's data types.
+We will use the term "native data type" to refer to a data type used by any external array library (including Numpy), e.g. ``np.dtypes.Float64DType()``.
+We will also use the term "native scalar" or "native scalar type" to refer to a scalar value of a native data type. For example, ``np.float64(0)`` generates a scalar with the data dtype ``np.dtypes.Float64DType``
 
-Here are the operations we need to perform on data types in Zarr-Python:
+Zarr-Python needs to support the following operations on native data types:
 
 * Round-trip native data types to fields in array metadata documents.
     For example, the Numpy data type ``np.dtype('>i2')`` should be saved as ``{..., "dtype" : ">i2"}`` in Zarr V2 metadata.
 
     In Zarr V3 metadata, the same Numpy data type would be saved as  ``{..., "data_type": "int16", "codecs": [..., {"name": "bytes", "configuration": {"endian": "big"}, ...]}``
 
-* Define a default fill value. This is not mandated by the Zarr specifications, but it's convenient for users
+* Associate a default fill value with a native data type. This is not mandated by the Zarr specifications, but it's convenient for users
   to have a useful default. For numeric types like integers and floats the default can be statically set to 0, but for
   parametric data types like fixed-length strings the default can only be generated after the data type has been parametrized at runtime.
 
-* Round-trip scalars to the ``fill_value`` field in Zarr V2 and V3 array metadata documents. The Zarr V2 and V3 specifications
+* Round-trip native scalars to the ``fill_value`` field in Zarr V2 and V3 array metadata documents. The Zarr V2 and V3 specifications
   define how scalars of each data type should be stored as JSON in array metadata documents, and in principle each data type
   can define this encoding separately.
 

From c8d76800a7fb5742b8d02f5ba143df620dd66c35 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Mon, 17 Mar 2025 14:32:12 +0100
Subject: [PATCH 036/130] update data type names

---
 src/zarr/core/array.py        | 41 ++++++++++++++++++++---------------
 src/zarr/core/config.py       | 14 +++++-------
 src/zarr/core/dtype/_numpy.py | 33 +++++++++++++++-------------
 3 files changed, 48 insertions(+), 40 deletions(-)

diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py
index 465a2b6cc8..7e2d65f5bc 100644
--- a/src/zarr/core/array.py
+++ b/src/zarr/core/array.py
@@ -30,9 +30,6 @@
 from zarr.abc.codec import ArrayArrayCodec, ArrayBytesCodec, BytesBytesCodec, Codec
 from zarr.abc.store import Store, set_or_delete
 from zarr.codecs._v2 import V2Codec
-from zarr.codecs.bytes import BytesCodec
-from zarr.codecs.vlen_utf8 import VLenBytesCodec, VLenUTF8Codec
-from zarr.codecs.zstd import ZstdCodec
 from zarr.core._info import ArrayInfo
 from zarr.core.array_spec import ArrayConfig, ArrayConfigLike, ArraySpec, parse_array_config
 from zarr.core.attributes import Attributes
@@ -69,8 +66,6 @@
 from zarr.core.config import config as zarr_config
 from zarr.core.dtype import (
     DTypeWrapper,
-    FixedLengthAsciiString,
-    VariableLengthString,
     parse_data_type,
 )
 from zarr.core.indexing import (
@@ -4224,21 +4219,29 @@ def _get_default_chunk_encoding_v3(
     """
     Get the default ArrayArrayCodecs, ArrayBytesCodec, and BytesBytesCodec for a given dtype.
     """
-    filters = ()
-    compressors = (ZstdCodec(level=0, checksum=False),)
     # TODO: find a registry-style solution for this that isn't bloated
     # We need to associate specific dtypes with specific encoding schemes
 
-    if isinstance(dtype, VariableLengthString):
-        serializer = VLenUTF8Codec()
-    elif isinstance(dtype, FixedLengthAsciiString):
-        serializer = VLenBytesCodec()
+    if dtype._zarr_v3_name in zarr_config.get("array.v3_default_filters"):
+        filters = zarr_config.get(f"array.v3_default_filters.{dtype._zarr_v3_name}")
     else:
-        if dtype.to_dtype().itemsize == 1:
-            serializer = BytesCodec(endian=None)
-        else:
-            serializer = BytesCodec()
-    return filters, serializer, compressors
+        filters = zarr_config.get("array.v3_default_filters.default")
+
+    if dtype._zarr_v3_name in zarr_config.get("array.v3_default_compressors"):
+        compressors = zarr_config.get(f"array.v3_default_compressors.{dtype._zarr_v3_name}")
+    else:
+        compressors = zarr_config.get("array.v3_default_compressors.default")
+
+    if dtype._zarr_v3_name in zarr_config.get("array.v3_default_serializer"):
+        serializer = zarr_config.get(f"array.v3_default_serializer.{dtype._zarr_v3_name}")
+    else:
+        serializer = zarr_config.get("array.v3_default_serializer.default")
+
+    return (
+        tuple(_parse_array_array_codec(f) for f in filters),
+        _parse_array_bytes_codec(serializer),
+        tuple(_parse_bytes_bytes_codec(c) for c in compressors),
+    )
 
 
 def _get_default_chunk_encoding_v2(
@@ -4256,7 +4259,11 @@ def _get_default_chunk_encoding_v2(
         compressor = zarr_config.get(f"array.v2_default_compressor.{dtype._zarr_v3_name}")
     else:
         compressor = zarr_config.get("array.v2_default_compressor.default")
-    return filters, compressor
+
+    if filters is not None:
+        filters = tuple(numcodecs.get_codec(f) for f in filters)
+
+    return filters, numcodecs.get_codec(compressor)
 
 
 def _parse_chunk_encoding_v2(
diff --git a/src/zarr/core/config.py b/src/zarr/core/config.py
index 71c311d7d5..aa4dde049e 100644
--- a/src/zarr/core/config.py
+++ b/src/zarr/core/config.py
@@ -36,8 +36,6 @@
 if TYPE_CHECKING:
     from donfig.config_obj import ConfigSet
 
-from collections import defaultdict
-
 
 class BadConfigError(ValueError):
     _msg = "bad Config: %r"
@@ -82,15 +80,15 @@ def enable_gpu(self) -> ConfigSet:
                 "v2_default_compressor": {"default": {"id": "zstd", "level": 0, "checksum": False}},
                 "v2_default_filters": {
                     "default": None,
-                    "numpy.variable_length_unicode_string": [{"id": "vlen-utf8"}],
-                    "numpy.fixed_length_unicode_string": [{"id": "vlen-utf8"}],
-                    "r*": [{"id": "vlen-bytes"}],
+                    "variable_length_utf8": [{"id": "vlen-utf8"}],
+                    "fixed_length_ucs4": [{"id": "vlen-utf8"}],
+                    "fixed_length_ascii": [{"id": "vlen-bytes"}],
                 },
-                "v3_default_filters": defaultdict(list),
+                "v3_default_filters": {"default": ()},
                 "v3_default_serializer": {
                     "default": {"name": "bytes", "configuration": {"endian": "little"}},
-                    "numpy.variable_length_unicode_string": [{"name": "vlen-utf8"}],
-                    "numpy.fixed_length_unicode_string": [{"name": "vlen-utf8"}],
+                    "variable_length_utf8": {"name": "vlen-utf8"},
+                    "fixed_length_ucs4": {"name": "vlen-utf8"},
                     "r*": {"name": "vlen-bytes"},
                 },
                 "v3_default_compressors": {
diff --git a/src/zarr/core/dtype/_numpy.py b/src/zarr/core/dtype/_numpy.py
index d61fedd4ab..fa97503795 100644
--- a/src/zarr/core/dtype/_numpy.py
+++ b/src/zarr/core/dtype/_numpy.py
@@ -496,7 +496,7 @@ def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.complex1
 @dataclass(frozen=True, kw_only=True)
 class FixedLengthAsciiString(DTypeWrapper[np.dtypes.BytesDType[Any], np.bytes_]):
     dtype_cls = np.dtypes.BytesDType
-    _zarr_v3_name = "numpy.static_byte_string"
+    _zarr_v3_name = "fixed_length_ascii"
     item_size_bits: ClassVar[int] = 8
     length: int = 1
 
@@ -523,20 +523,20 @@ def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.bytes_:
 
 
 @dataclass(frozen=True, kw_only=True)
-class FixedLengthBytes(DTypeWrapper[np.dtypes.VoidDType[Any], np.void]):
+class FixedLengthBytes(DTypeWrapper[np.dtypes.VoidDType, np.void]):
     dtype_cls = np.dtypes.VoidDType
     _zarr_v3_name = "r*"
     item_size_bits: ClassVar[int] = 8
     length: int = 1
 
     @classmethod
-    def _from_dtype_unsafe(cls, dtype: np.dtypes.VoidDType[Any]) -> Self:
+    def _from_dtype_unsafe(cls, dtype: np.dtypes.VoidDType) -> Self:
         return cls(length=dtype.itemsize // (cls.item_size_bits // 8))
 
     def default_value(self) -> np.void:
         return self.cast_value(("\x00" * self.length).encode("ascii"))
 
-    def to_dtype(self) -> np.dtypes.VoidDType[Any]:
+    def to_dtype(self) -> np.dtypes.VoidDType:
         # Numpy does not allow creating a void type
         # by invoking np.dtypes.VoidDType directly
         return np.dtype(f"V{self.length}")
@@ -577,7 +577,7 @@ def check_dict(cls, data: dict[str, JSON]) -> TypeGuard[dict[str, JSON]]:
             isinstance(data, dict)
             and "name" in data
             and isinstance(data["name"], str)
-            and re.match(r"^r\d+$", data["name"])
+            and (re.match(r"^r\d+$", data["name"]) is not None)
         )
 
     def to_json_value(self, data: np.void, *, zarr_format: ZarrFormat) -> str:
@@ -592,7 +592,7 @@ def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.void:
 @dataclass(frozen=True, kw_only=True)
 class FixedLengthUnicodeString(DTypeWrapper[np.dtypes.StrDType[int], np.str_]):
     dtype_cls = np.dtypes.StrDType
-    _zarr_v3_name = "numpy.fixed_length_unicode_string"
+    _zarr_v3_name = "fixed_length_ucs4"
     item_size_bits: ClassVar[int] = 32  # UCS4 is 32 bits per code point
     endianness: Endianness | None = "native"
     length: int = 1
@@ -605,7 +605,10 @@ def _from_dtype_unsafe(cls, dtype: np.dtypes.StrDType[int]) -> Self:
         )
 
     def to_dtype(self) -> np.dtypes.StrDType[int]:
-        return self.dtype_cls(self.length).newbyteorder(endianness_to_numpy_str(self.endianness))
+        return cast(
+            np.dtypes.StrDType[int],
+            self.dtype_cls(self.length).newbyteorder(endianness_to_numpy_str(self.endianness)),
+        )
 
     def default_value(self) -> np.str_:
         return np.str_("")
@@ -627,7 +630,7 @@ def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.str_:
     @dataclass(frozen=True, kw_only=True)
     class VariableLengthString(DTypeWrapper[np.dtypes.StringDType, str]):
         dtype_cls = np.dtypes.StringDType
-        _zarr_v3_name = "numpy.variable_length_string"
+        _zarr_v3_name = "variable_length_utf8"
 
         @classmethod
         def _from_dtype_unsafe(cls, dtype: np.dtypes.StringDType) -> Self:
@@ -658,14 +661,14 @@ def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> str:
     @dataclass(frozen=True, kw_only=True)
     class VariableLengthString(DTypeWrapper[np.dtypes.ObjectDType, str]):
         dtype_cls = np.dtypes.ObjectDType
-        _zarr_v3_name = "numpy.variable_length_string"
+        _zarr_v3_name = "variable_length_utf8"
 
         @classmethod
         def _from_dtype_unsafe(cls, dtype: np.dtypes.ObjectDType) -> Self:
             return cls()
 
         def to_dtype(self) -> np.dtypes.ObjectDType:
-            return self.dtype_cls()
+            return cast(np.dtypes.ObjectDType, self.dtype_cls())
 
         def cast_value(self, value: object) -> str:
             return str(value)
@@ -695,7 +698,7 @@ def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> str:
 @dataclass(frozen=True, kw_only=True)
 class DateTime64(DTypeWrapper[np.dtypes.DateTime64DType, np.datetime64]):
     dtype_cls = np.dtypes.DateTime64DType
-    _zarr_v3_name = "numpy.datetime64"
+    _zarr_v3_name = "datetime64"
     unit: DateUnit | TimeUnit = "s"
     endianness: Endianness = "native"
 
@@ -713,7 +716,7 @@ def _from_dtype_unsafe(cls, dtype: np.dtypes.DateTime64DType) -> Self:
         return cls(unit=unit, endianness=endianness_from_numpy_str(dtype.byteorder))
 
     def cast_value(self, value: object) -> np.datetime64:
-        return self.to_dtype().type(value, self.unit)
+        return cast(np.datetime64, self.to_dtype().type(value, self.unit))
 
     def to_dtype(self) -> np.dtypes.DateTime64DType:
         # Numpy does not allow creating datetime64 via
@@ -734,14 +737,14 @@ def to_json_value(self, data: np.datetime64, *, zarr_format: ZarrFormat) -> int:
 @dataclass(frozen=True, kw_only=True)
 class Structured(DTypeWrapper[np.dtypes.VoidDType, np.void]):
     dtype_cls = np.dtypes.VoidDType
-    _zarr_v3_name = "numpy.structured"
+    _zarr_v3_name = "structured"
     fields: tuple[tuple[str, DTypeWrapper[Any, Any]], ...]
 
     def default_value(self) -> np.void:
         return self.cast_value(0)
 
     def cast_value(self, value: object) -> np.void:
-        return np.array([value], dtype=self.to_dtype())[0]
+        return cast(np.void, np.array([value], dtype=self.to_dtype())[0])
 
     @classmethod
     def check_dtype(cls, dtype: np.dtypes.DTypeLike) -> TypeGuard[np.dtypes.VoidDType]:
@@ -787,7 +790,7 @@ def to_dict(self) -> dict[str, JSON]:
         return base_dict
 
     @classmethod
-    def check_dict(cls, data: JSON) -> bool:
+    def check_dict(cls, data: JSON) -> TypeGuard[JSON]:
         return (
             isinstance(data, dict)
             and "name" in data

From 2a7b5a8cead0dc5c74525248b7a27058846f091b Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Mon, 17 Mar 2025 16:11:26 +0100
Subject: [PATCH 037/130] fix config test failures

---
 src/zarr/core/array.py        | 10 +++++-
 src/zarr/core/config.py       |  2 +-
 src/zarr/core/dtype/_numpy.py |  2 +-
 tests/test_array.py           | 19 +++++++---
 tests/test_config.py          | 67 +++++++++++++++--------------------
 5 files changed, 53 insertions(+), 47 deletions(-)

diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py
index 7e2d65f5bc..0fa25c3695 100644
--- a/src/zarr/core/array.py
+++ b/src/zarr/core/array.py
@@ -30,6 +30,7 @@
 from zarr.abc.codec import ArrayArrayCodec, ArrayBytesCodec, BytesBytesCodec, Codec
 from zarr.abc.store import Store, set_or_delete
 from zarr.codecs._v2 import V2Codec
+from zarr.codecs.bytes import BytesCodec
 from zarr.core._info import ArrayInfo
 from zarr.core.array_spec import ArrayConfig, ArrayConfigLike, ArraySpec, parse_array_config
 from zarr.core.attributes import Attributes
@@ -4231,7 +4232,6 @@ def _get_default_chunk_encoding_v3(
         compressors = zarr_config.get(f"array.v3_default_compressors.{dtype._zarr_v3_name}")
     else:
         compressors = zarr_config.get("array.v3_default_compressors.default")
-
     if dtype._zarr_v3_name in zarr_config.get("array.v3_default_serializer"):
         serializer = zarr_config.get(f"array.v3_default_serializer.{dtype._zarr_v3_name}")
     else:
@@ -4353,6 +4353,14 @@ def _parse_chunk_encoding_v3(
 
         out_bytes_bytes = tuple(_parse_bytes_bytes_codec(c) for c in maybe_bytes_bytes)
 
+    # specialize codecs as needed given the dtype
+
+    # TODO: refactor so that the config only contains the name of the codec, and we use the dtype
+    # to create the codec instance, instead of storing a dict representation of a full codec.
+
+    if isinstance(out_array_bytes, BytesCodec) and dtype.to_dtype().itemsize == 1:
+        # The default endianness in the bytescodec might not be None, so we need to replace it
+        out_array_bytes = replace(out_array_bytes, endian=None)
     return out_array_array, out_array_bytes, out_bytes_bytes
 
 
diff --git a/src/zarr/core/config.py b/src/zarr/core/config.py
index aa4dde049e..054316fd37 100644
--- a/src/zarr/core/config.py
+++ b/src/zarr/core/config.py
@@ -84,7 +84,7 @@ def enable_gpu(self) -> ConfigSet:
                     "fixed_length_ucs4": [{"id": "vlen-utf8"}],
                     "fixed_length_ascii": [{"id": "vlen-bytes"}],
                 },
-                "v3_default_filters": {"default": ()},
+                "v3_default_filters": {"default": []},
                 "v3_default_serializer": {
                     "default": {"name": "bytes", "configuration": {"endian": "little"}},
                     "variable_length_utf8": {"name": "vlen-utf8"},
diff --git a/src/zarr/core/dtype/_numpy.py b/src/zarr/core/dtype/_numpy.py
index fa97503795..c562f0a593 100644
--- a/src/zarr/core/dtype/_numpy.py
+++ b/src/zarr/core/dtype/_numpy.py
@@ -711,7 +711,7 @@ def to_dict(self) -> dict[str, JSON]:
     @classmethod
     def _from_dtype_unsafe(cls, dtype: np.dtypes.DateTime64DType) -> Self:
         unit = dtype.name[dtype.name.rfind("[") + 1 : dtype.name.rfind("]")]
-        if unit not in get_args(DateUnit | TimeUnit):
+        if unit not in get_args(DateUnit) and unit not in get_args(TimeUnit):
             raise DataTypeValidationError('Invalid unit for "numpy.datetime64"')
         return cls(unit=unit, endianness=endianness_from_numpy_str(dtype.byteorder))
 
diff --git a/tests/test_array.py b/tests/test_array.py
index f8880c86c0..b2f21d6562 100644
--- a/tests/test_array.py
+++ b/tests/test_array.py
@@ -1,4 +1,5 @@
 import dataclasses
+import inspect
 import json
 import math
 import multiprocessing as mp
@@ -28,8 +29,6 @@
 from zarr.core.array import (
     CompressorsLike,
     FiltersLike,
-    _get_default_chunk_encoding_v2,
-    _get_default_chunk_encoding_v3,
     _parse_chunk_encoding_v2,
     _parse_chunk_encoding_v3,
     chunks_initialized,
@@ -1064,13 +1063,23 @@ async def test_default_filters_compressors(
             shape=(10,),
             zarr_format=zarr_format,
         )
+
+        sig = inspect.signature(create_array)
+
         if zarr_format == 3:
-            expected_filters, expected_serializer, expected_compressors = (
-                _get_default_chunk_encoding_v3(dtype=zdtype)
+            expected_filters, expected_serializer, expected_compressors = _parse_chunk_encoding_v3(
+                compressors=sig.parameters["compressors"].default,
+                filters=sig.parameters["filters"].default,
+                serializer=sig.parameters["serializer"].default,
+                dtype=zdtype,
             )
 
         elif zarr_format == 2:
-            default_filters, default_compressors = _get_default_chunk_encoding_v2(dtype=zdtype)
+            default_filters, default_compressors = _parse_chunk_encoding_v2(
+                compressor=sig.parameters["compressors"].default,
+                filters=sig.parameters["filters"].default,
+                dtype=zdtype,
+            )
             if default_filters is None:
                 expected_filters = ()
             else:
diff --git a/tests/test_config.py b/tests/test_config.py
index 1a2453d646..34ecfdc119 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -19,10 +19,12 @@
     GzipCodec,
     ShardingCodec,
 )
+from zarr.core.array import create_array
 from zarr.core.array_spec import ArraySpec
 from zarr.core.buffer import NDBuffer
 from zarr.core.codec_pipeline import BatchedCodecPipeline
 from zarr.core.config import BadConfigError, config
+from zarr.core.dtype import get_data_type_from_numpy
 from zarr.core.indexing import SelectorTuple
 from zarr.registry import (
     fully_qualified_name,
@@ -52,33 +54,24 @@ def test_config_defaults_set() -> None:
             "array": {
                 "order": "C",
                 "write_empty_chunks": False,
-                "v2_default_compressor": {
-                    "numeric": {"id": "zstd", "level": 0, "checksum": False},
-                    "string": {"id": "zstd", "level": 0, "checksum": False},
-                    "bytes": {"id": "zstd", "level": 0, "checksum": False},
-                },
+                "v2_default_compressor": {"default": {"id": "zstd", "level": 0, "checksum": False}},
                 "v2_default_filters": {
-                    "numeric": None,
-                    "string": [{"id": "vlen-utf8"}],
-                    "bytes": [{"id": "vlen-bytes"}],
-                    "raw": None,
+                    "default": None,
+                    "variable_length_utf8": [{"id": "vlen-utf8"}],
+                    "fixed_length_ucs4": [{"id": "vlen-utf8"}],
+                    "fixed_length_ascii": [{"id": "vlen-bytes"}],
                 },
-                "v3_default_filters": {"numeric": [], "string": [], "bytes": []},
+                "v3_default_filters": {"default": []},
                 "v3_default_serializer": {
-                    "numeric": {"name": "bytes", "configuration": {"endian": "little"}},
-                    "string": {"name": "vlen-utf8"},
-                    "bytes": {"name": "vlen-bytes"},
+                    "default": {"name": "bytes", "configuration": {"endian": "little"}},
+                    "variable_length_utf8": {"name": "vlen-utf8"},
+                    "fixed_length_ucs4": {"name": "vlen-utf8"},
+                    "r*": {"name": "vlen-bytes"},
                 },
                 "v3_default_compressors": {
-                    "numeric": [
-                        {"name": "zstd", "configuration": {"level": 0, "checksum": False}},
-                    ],
-                    "string": [
-                        {"name": "zstd", "configuration": {"level": 0, "checksum": False}},
-                    ],
-                    "bytes": [
+                    "default": [
                         {"name": "zstd", "configuration": {"level": 0, "checksum": False}},
-                    ],
+                    ]
                 },
             },
             "async": {"concurrency": 10, "timeout": None},
@@ -306,26 +299,22 @@ class NewCodec2(BytesCodec):
 
 @pytest.mark.parametrize("dtype", ["int", "bytes", "str"])
 async def test_default_codecs(dtype: str) -> None:
-    with config.set(
-        {
-            "array.v3_default_compressors": {  # test setting non-standard codecs
-                "numeric": [
-                    {"name": "gzip", "configuration": {"level": 5}},
-                ],
-                "string": [
-                    {"name": "gzip", "configuration": {"level": 5}},
-                ],
-                "bytes": [
-                    {"name": "gzip", "configuration": {"level": 5}},
-                ],
-            }
-        }
-    ):
-        arr = await zarr.api.asynchronous.create_array(
+    """
+    Test that the default compressors are sensitive to the current setting of the config.
+    """
+    zdtype = get_data_type_from_numpy(dtype)
+    expected_compressors = (GzipCodec(),)
+    new_conf = {
+        f"array.v3_default_compressors.{zdtype._zarr_v3_name}": [
+            c.to_dict() for c in expected_compressors
+        ]
+    }
+    with config.set(new_conf):
+        arr = await create_array(
             shape=(100,),
             chunks=(100,),
-            dtype=np.dtype(dtype),
+            dtype=dtype,
             zarr_format=3,
             store=MemoryStore(),
         )
-        assert arr.compressors == (GzipCodec(),)
+        assert arr.compressors == expected_compressors

From e855e54d3ac757af32f231ec26f65d9e38fcb809 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Mon, 17 Mar 2025 16:27:10 +0100
Subject: [PATCH 038/130] call to_dtype once in blosc evolve_from_array_spec

---
 src/zarr/codecs/blosc.py | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/src/zarr/codecs/blosc.py b/src/zarr/codecs/blosc.py
index 79be926ad8..4cee49f56d 100644
--- a/src/zarr/codecs/blosc.py
+++ b/src/zarr/codecs/blosc.py
@@ -136,18 +136,14 @@ def to_dict(self) -> dict[str, JSON]:
         }
 
     def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self:
-        dtype = array_spec.dtype
+        dtype = array_spec.dtype.to_dtype()
         new_codec = self
         if new_codec.typesize is None:
-            new_codec = replace(new_codec, typesize=dtype.to_dtype().itemsize)
+            new_codec = replace(new_codec, typesize=dtype.itemsize)
         if new_codec.shuffle is None:
             new_codec = replace(
                 new_codec,
-                shuffle=(
-                    BloscShuffle.bitshuffle
-                    if dtype.to_dtype().itemsize == 1
-                    else BloscShuffle.shuffle
-                ),
+                shuffle=(BloscShuffle.bitshuffle if dtype.itemsize == 1 else BloscShuffle.shuffle),
             )
 
         return new_codec

From a2da99add279049ae3827537e5c63f2652cc8aa2 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Wed, 19 Mar 2025 21:18:47 +0100
Subject: [PATCH 039/130] refactor dtypewrapper -> zdtype

---
 src/zarr/abc/codec.py                    |   17 +-
 src/zarr/api/asynchronous.py             |    6 +-
 src/zarr/codecs/bytes.py                 |    9 +-
 src/zarr/codecs/sharding.py              |   16 +-
 src/zarr/codecs/transpose.py             |   10 +-
 src/zarr/core/_info.py                   |   17 +-
 src/zarr/core/array.py                   |   85 +-
 src/zarr/core/array_spec.py              |   15 +-
 src/zarr/core/buffer/cpu.py              |    2 +-
 src/zarr/core/codec_pipeline.py          |    9 +-
 src/zarr/core/common.py                  |    2 -
 src/zarr/core/config.py                  |   10 +-
 src/zarr/core/dtype/__init__.py          |   53 +-
 src/zarr/core/dtype/_numpy.py            | 1190 +++++++++++++++++-----
 src/zarr/core/dtype/common.py            |   71 +-
 src/zarr/core/dtype/registry.py          |   20 +-
 src/zarr/core/dtype/wrapper.py           |  157 ++-
 src/zarr/core/metadata/v2.py             |   44 +-
 src/zarr/core/metadata/v3.py             |   36 +-
 src/zarr/testing/strategies.py           |    4 +-
 tests/conftest.py                        |    4 +-
 tests/test_array.py                      |    6 +-
 tests/test_codecs/test_vlen.py           |    6 +-
 tests/test_config.py                     |   16 +-
 tests/test_metadata/test_consolidated.py |    2 +-
 tests/test_metadata/test_dtype.py        |  120 ++-
 tests/test_metadata/test_v3.py           |   16 +-
 27 files changed, 1312 insertions(+), 631 deletions(-)

diff --git a/src/zarr/abc/codec.py b/src/zarr/abc/codec.py
index 16400f5f4b..31cb44d84e 100644
--- a/src/zarr/abc/codec.py
+++ b/src/zarr/abc/codec.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 
 from abc import abstractmethod
-from typing import TYPE_CHECKING, Any, Generic, TypeVar
+from typing import TYPE_CHECKING, Generic, TypeVar
 
 from zarr.abc.metadata import Metadata
 from zarr.core.buffer import Buffer, NDBuffer
@@ -12,11 +12,10 @@
     from collections.abc import Awaitable, Callable, Iterable
     from typing import Self
 
-    import numpy as np
-
     from zarr.abc.store import ByteGetter, ByteSetter
     from zarr.core.array_spec import ArraySpec
     from zarr.core.chunk_grids import ChunkGrid
+    from zarr.core.dtype.wrapper import ZDType, _BaseDType, _BaseScalar
     from zarr.core.indexing import SelectorTuple
 
 __all__ = [
@@ -93,7 +92,13 @@ def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self:
         """
         return self
 
-    def validate(self, *, shape: ChunkCoords, dtype: np.dtype[Any], chunk_grid: ChunkGrid) -> None:
+    def validate(
+        self,
+        *,
+        shape: ChunkCoords,
+        dtype: ZDType[_BaseDType, _BaseScalar],
+        chunk_grid: ChunkGrid,
+    ) -> None:
         """Validates that the codec configuration is compatible with the array metadata.
         Raises errors when the codec configuration is not compatible.
 
@@ -285,7 +290,9 @@ def supports_partial_decode(self) -> bool: ...
     def supports_partial_encode(self) -> bool: ...
 
     @abstractmethod
-    def validate(self, *, shape: ChunkCoords, dtype: np.dtype[Any], chunk_grid: ChunkGrid) -> None:
+    def validate(
+        self, *, shape: ChunkCoords, dtype: ZDType[_BaseDType, _BaseScalar], chunk_grid: ChunkGrid
+    ) -> None:
         """Validates that all codec configurations are compatible with the array metadata.
         Raises errors when a codec configuration is not compatible.
 
diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py
index d3e88ae7d3..72a12f9acb 100644
--- a/src/zarr/api/asynchronous.py
+++ b/src/zarr/api/asynchronous.py
@@ -28,7 +28,7 @@
     _warn_order_kwarg,
     _warn_write_empty_chunks_kwarg,
 )
-from zarr.core.dtype import get_data_type_from_numpy
+from zarr.core.dtype import get_data_type_from_native_dtype
 from zarr.core.group import (
     AsyncGroup,
     ConsolidatedMetadata,
@@ -433,7 +433,7 @@ async def save_array(
     shape = arr.shape
     chunks = getattr(arr, "chunks", None)  # for array-likes with chunks attribute
     overwrite = kwargs.pop("overwrite", None) or _infer_overwrite(mode)
-    zarr_dtype = get_data_type_from_numpy(arr.dtype)
+    zarr_dtype = get_data_type_from_native_dtype(arr.dtype)
     new = await AsyncArray._create(
         store_path,
         zarr_format=zarr_format,
@@ -984,7 +984,7 @@ async def create(
         _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format)
         or _default_zarr_format()
     )
-    dtype_wrapped = get_data_type_from_numpy(dtype)
+    dtype_wrapped = get_data_type_from_native_dtype(dtype)
     if zarr_format == 2:
         if chunks is None:
             chunks = shape
diff --git a/src/zarr/codecs/bytes.py b/src/zarr/codecs/bytes.py
index e7b57ab9b3..c86705c8ea 100644
--- a/src/zarr/codecs/bytes.py
+++ b/src/zarr/codecs/bytes.py
@@ -3,20 +3,21 @@
 import sys
 from dataclasses import dataclass, replace
 from enum import Enum
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, cast
 
 import numpy as np
 
 from zarr.abc.codec import ArrayBytesCodec
 from zarr.core.buffer import Buffer, NDArrayLike, NDBuffer
 from zarr.core.common import JSON, parse_enum, parse_named_configuration
-from zarr.core.dtype.common import endianness_to_numpy_str
+from zarr.core.dtype._numpy import endianness_to_numpy_str
 from zarr.registry import register_codec
 
 if TYPE_CHECKING:
     from typing import Self
 
     from zarr.core.array_spec import ArraySpec
+    from zarr.core.dtype.common import Endianness
 
 
 class Endian(Enum):
@@ -73,7 +74,9 @@ async def _decode_single(
     ) -> NDBuffer:
         assert isinstance(chunk_bytes, Buffer)
         # TODO: remove endianness enum in favor of literal union
-        endian_str = self.endian.value if self.endian is not None else None
+        endian_str = cast(
+            "Endianness | None", self.endian.value if self.endian is not None else None
+        )
         dtype = chunk_spec.dtype.to_dtype().newbyteorder(endianness_to_numpy_str(endian_str))
 
         as_array_like = chunk_bytes.as_array_like()
diff --git a/src/zarr/codecs/sharding.py b/src/zarr/codecs/sharding.py
index c501346980..e8a23e20c4 100644
--- a/src/zarr/codecs/sharding.py
+++ b/src/zarr/codecs/sharding.py
@@ -43,6 +43,7 @@
     parse_shapelike,
     product,
 )
+from zarr.core.dtype._numpy import UInt64
 from zarr.core.indexing import (
     BasicIndexer,
     SelectorTuple,
@@ -58,7 +59,7 @@
     from typing import Self
 
     from zarr.core.common import JSON
-    from zarr.core.dtype.wrapper import DTypeWrapper
+    from zarr.core.dtype.wrapper import ZDType, _BaseDType, _BaseScalar
 
 MAX_UINT_64 = 2**64 - 1
 ShardMapping = Mapping[ChunkCoords, Buffer]
@@ -405,7 +406,11 @@ def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self:
         return self
 
     def validate(
-        self, *, shape: ChunkCoords, dtype: DTypeWrapper[Any, Any], chunk_grid: ChunkGrid
+        self,
+        *,
+        shape: ChunkCoords,
+        dtype: ZDType[_BaseDType, _BaseScalar],
+        chunk_grid: ChunkGrid,
     ) -> None:
         if len(self.chunk_shape) != len(shape):
             raise ValueError(
@@ -443,7 +448,10 @@ async def _decode_single(
 
         # setup output array
         out = chunk_spec.prototype.nd_buffer.create(
-            shape=shard_shape, dtype=shard_spec.dtype, order=shard_spec.order, fill_value=0
+            shape=shard_shape,
+            dtype=shard_spec.dtype.to_dtype(),
+            order=shard_spec.order,
+            fill_value=0,
         )
         shard_dict = await _ShardReader.from_bytes(shard_bytes, self, chunks_per_shard)
 
@@ -685,7 +693,7 @@ def _shard_index_size(self, chunks_per_shard: ChunkCoords) -> int:
     def _get_index_chunk_spec(self, chunks_per_shard: ChunkCoords) -> ArraySpec:
         return ArraySpec(
             shape=chunks_per_shard + (2,),
-            dtype=np.dtype("<u8"),
+            dtype=UInt64(endianness="little"),
             fill_value=MAX_UINT_64,
             config=ArrayConfig(
                 order="C", write_empty_chunks=False
diff --git a/src/zarr/codecs/transpose.py b/src/zarr/codecs/transpose.py
index 1aa1eb40e2..0e49e3db10 100644
--- a/src/zarr/codecs/transpose.py
+++ b/src/zarr/codecs/transpose.py
@@ -12,10 +12,11 @@
 from zarr.registry import register_codec
 
 if TYPE_CHECKING:
-    from typing import Any, Self
+    from typing import Self
 
     from zarr.core.buffer import NDBuffer
     from zarr.core.chunk_grids import ChunkGrid
+    from zarr.core.dtype.wrapper import ZDType, _BaseDType, _BaseScalar
 
 
 def parse_transpose_order(data: JSON | Iterable[int]) -> tuple[int, ...]:
@@ -45,7 +46,12 @@ def from_dict(cls, data: dict[str, JSON]) -> Self:
     def to_dict(self) -> dict[str, JSON]:
         return {"name": "transpose", "configuration": {"order": tuple(self.order)}}
 
-    def validate(self, shape: tuple[int, ...], dtype: np.dtype[Any], chunk_grid: ChunkGrid) -> None:
+    def validate(
+        self,
+        shape: tuple[int, ...],
+        dtype: ZDType[_BaseDType, _BaseScalar],
+        chunk_grid: ChunkGrid,
+    ) -> None:
         if len(self.order) != len(shape):
             raise ValueError(
                 f"The `order` tuple needs have as many entries as there are dimensions in the array. Got {self.order}."
diff --git a/src/zarr/core/_info.py b/src/zarr/core/_info.py
index 3a3a3a5714..c9637b156a 100644
--- a/src/zarr/core/_info.py
+++ b/src/zarr/core/_info.py
@@ -1,13 +1,16 @@
+from __future__ import annotations
+
 import dataclasses
 import textwrap
-from typing import Any, Literal
+from typing import TYPE_CHECKING, Any, Literal
 
-import numcodecs.abc
-import numpy as np
+if TYPE_CHECKING:
+    import numcodecs.abc
+    import numpy as np
 
-from zarr.abc.codec import ArrayArrayCodec, ArrayBytesCodec, BytesBytesCodec
-from zarr.core.common import ZarrFormat
-from zarr.core.dtype.wrapper import DTypeWrapper
+    from zarr.abc.codec import ArrayArrayCodec, ArrayBytesCodec, BytesBytesCodec
+    from zarr.core.common import ZarrFormat
+    from zarr.core.dtype.wrapper import ZDType, _BaseDType, _BaseScalar
 
 
 @dataclasses.dataclass(kw_only=True)
@@ -78,7 +81,7 @@ class ArrayInfo:
 
     _type: Literal["Array"] = "Array"
     _zarr_format: ZarrFormat
-    _data_type: np.dtype[Any] | DTypeWrapper
+    _data_type: np.dtype[Any] | ZDType[_BaseDType, _BaseScalar]
     _shape: tuple[int, ...]
     _shard_shape: tuple[int, ...] | None = None
     _chunk_shape: tuple[int, ...] | None = None
diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py
index 0fa25c3695..7b6eb455fc 100644
--- a/src/zarr/core/array.py
+++ b/src/zarr/core/array.py
@@ -41,7 +41,7 @@
     default_buffer_prototype,
 )
 from zarr.core.buffer.cpu import buffer_prototype as cpu_buffer_prototype
-from zarr.core.chunk_grids import RegularChunkGrid, _auto_partition, normalize_chunks
+from zarr.core.chunk_grids import ChunkGrid, RegularChunkGrid, _auto_partition, normalize_chunks
 from zarr.core.chunk_key_encodings import (
     ChunkKeyEncoding,
     ChunkKeyEncodingLike,
@@ -66,7 +66,7 @@
 )
 from zarr.core.config import config as zarr_config
 from zarr.core.dtype import (
-    DTypeWrapper,
+    ZDType,
     parse_data_type,
 )
 from zarr.core.indexing import (
@@ -124,6 +124,7 @@
 
     from zarr.abc.codec import CodecPipeline
     from zarr.codecs.sharding import ShardingCodecIndexLocation
+    from zarr.core.dtype.wrapper import _BaseDType, _BaseScalar
     from zarr.core.group import AsyncGroup
     from zarr.storage import StoreLike
 
@@ -550,7 +551,7 @@ async def _create(
         *,
         # v2 and v3
         shape: ShapeLike,
-        dtype: npt.DTypeLike | DTypeWrapper[Any, Any],
+        dtype: npt.DTypeLike | ZDType[_BaseDType, _BaseScalar],
         zarr_format: ZarrFormat = 3,
         fill_value: Any | None = None,
         attributes: dict[str, JSON] | None = None,
@@ -580,7 +581,7 @@ async def _create(
         Deprecated in favor of :func:`zarr.api.asynchronous.create_array`.
         """
 
-        dtype_parsed = parse_data_type(dtype)
+        dtype_parsed = parse_data_type(dtype, zarr_format=zarr_format)
         store_path = await make_store_path(store)
 
         shape = parse_shapelike(shape)
@@ -668,7 +669,7 @@ async def _create(
     @staticmethod
     def _create_metadata_v3(
         shape: ShapeLike,
-        dtype: DTypeWrapper[Any, Any],
+        dtype: ZDType[_BaseDType, _BaseScalar],
         chunk_shape: ChunkCoords,
         fill_value: Any | None = None,
         chunk_key_encoding: ChunkKeyEncodingLike | None = None,
@@ -693,14 +694,6 @@ def _create_metadata_v3(
         else:
             chunk_key_encoding_parsed = chunk_key_encoding
 
-        if dtype.to_dtype().kind in ("U", "T", "S"):
-            warn(
-                f"The dtype `{dtype}` is currently not part in the Zarr format 3 specification. It "
-                "may not be supported by other zarr implementations and may change in the future.",
-                category=UserWarning,
-                stacklevel=2,
-            )
-
         if fill_value is None:
             # v3 spec will not allow a null fill value
             fill_value_parsed = dtype.default_value()
@@ -725,7 +718,7 @@ async def _create_v3(
         store_path: StorePath,
         *,
         shape: ShapeLike,
-        dtype: DTypeWrapper[Any, Any],
+        dtype: ZDType[_BaseDType, _BaseScalar],
         chunk_shape: ChunkCoords,
         config: ArrayConfig,
         fill_value: Any | None = None,
@@ -773,7 +766,7 @@ async def _create_v3(
     @staticmethod
     def _create_metadata_v2(
         shape: ChunkCoords,
-        dtype: DTypeWrapper[Any, Any],
+        dtype: ZDType[_BaseDType, _BaseScalar],
         chunks: ChunkCoords,
         order: MemoryOrder,
         dimension_separator: Literal[".", "/"] | None = None,
@@ -803,7 +796,7 @@ async def _create_v2(
         store_path: StorePath,
         *,
         shape: ChunkCoords,
-        dtype: DTypeWrapper[Any, Any],
+        dtype: ZDType[_BaseDType, _BaseScalar],
         chunks: ChunkCoords,
         order: MemoryOrder,
         config: ArrayConfig,
@@ -946,7 +939,7 @@ def chunks(self) -> ChunkCoords:
         return self.metadata.chunks
 
     @cached_property
-    def chunk_grid(self) -> RegularChunkGrid:
+    def chunk_grid(self) -> ChunkGrid:
         if self.metadata.zarr_format == 2:
             return RegularChunkGrid(chunk_shape=self.chunks)
         else:
@@ -1036,7 +1029,17 @@ def compressors(self) -> tuple[numcodecs.abc.Codec, ...] | tuple[BytesBytesCodec
         )
 
     @property
-    def dtype(self) -> np.dtype[Any]:
+    def _zdtype(self) -> ZDType[_BaseDType, _BaseScalar]:
+        """
+        The zarr-specific representation of the array data type
+        """
+        if self.metadata.zarr_format == 2:
+            return self.metadata.dtype
+        else:
+            return self.metadata.data_type
+
+    @property
+    def dtype(self) -> _BaseDType:
         """Returns the data type of the array.
 
         Returns
@@ -1044,10 +1047,7 @@ def dtype(self) -> np.dtype[Any]:
         np.dtype
             Data type of the array
         """
-        if self.metadata.zarr_format == 2:
-            return self.metadata.dtype.to_dtype()
-        else:
-            return self.metadata.data_type.to_dtype()
+        return self._zdtype.to_dtype()
 
     @property
     def order(self) -> MemoryOrder:
@@ -1273,7 +1273,7 @@ def get_chunk_spec(
         )
         return ArraySpec(
             shape=self.chunk_grid.chunk_shape,
-            dtype=self.dtype,
+            dtype=self._zdtype,
             fill_value=self.metadata.fill_value,
             config=array_config,
             prototype=prototype,
@@ -3922,7 +3922,7 @@ async def init_array(
 
     from zarr.codecs.sharding import ShardingCodec, ShardingCodecIndexLocation
 
-    dtype_wrapped = parse_data_type(dtype)
+    dtype_wrapped = parse_data_type(dtype, zarr_format=zarr_format)
     shape_parsed = parse_shapelike(shape)
     chunk_key_encoding_parsed = _parse_chunk_key_encoding(
         chunk_key_encoding, zarr_format=zarr_format
@@ -4215,25 +4215,30 @@ def _parse_chunk_key_encoding(
 
 
 def _get_default_chunk_encoding_v3(
-    dtype: DTypeWrapper[Any, Any],
+    dtype: ZDType[_BaseDType, _BaseScalar],
 ) -> tuple[tuple[ArrayArrayCodec, ...], ArrayBytesCodec, tuple[BytesBytesCodec, ...]]:
     """
     Get the default ArrayArrayCodecs, ArrayBytesCodec, and BytesBytesCodec for a given dtype.
     """
+    # the config will not allow keys to have "."  characters in them
+    # so we will access the config by transforming "." to "__"
+
+    dtype_name_conf = dtype._zarr_v3_name.replace(".", "__")
+
     # TODO: find a registry-style solution for this that isn't bloated
     # We need to associate specific dtypes with specific encoding schemes
 
-    if dtype._zarr_v3_name in zarr_config.get("array.v3_default_filters"):
-        filters = zarr_config.get(f"array.v3_default_filters.{dtype._zarr_v3_name}")
+    if dtype_name_conf in zarr_config.get("array.v3_default_filters"):
+        filters = zarr_config.get(f"array.v3_default_filters.{dtype_name_conf}")
     else:
         filters = zarr_config.get("array.v3_default_filters.default")
 
-    if dtype._zarr_v3_name in zarr_config.get("array.v3_default_compressors"):
-        compressors = zarr_config.get(f"array.v3_default_compressors.{dtype._zarr_v3_name}")
+    if dtype_name_conf in zarr_config.get("array.v3_default_compressors"):
+        compressors = zarr_config.get(f"array.v3_default_compressors.{dtype_name_conf}")
     else:
         compressors = zarr_config.get("array.v3_default_compressors.default")
-    if dtype._zarr_v3_name in zarr_config.get("array.v3_default_serializer"):
-        serializer = zarr_config.get(f"array.v3_default_serializer.{dtype._zarr_v3_name}")
+    if dtype_name_conf in zarr_config.get("array.v3_default_serializer"):
+        serializer = zarr_config.get(f"array.v3_default_serializer.{dtype_name_conf}")
     else:
         serializer = zarr_config.get("array.v3_default_serializer.default")
 
@@ -4245,18 +4250,22 @@ def _get_default_chunk_encoding_v3(
 
 
 def _get_default_chunk_encoding_v2(
-    dtype: DTypeWrapper[Any, Any],
+    dtype: ZDType[_BaseDType, _BaseScalar],
 ) -> tuple[tuple[numcodecs.abc.Codec, ...] | None, numcodecs.abc.Codec | None]:
     """
     Get the default chunk encoding for Zarr format 2 arrays, given a dtype
     """
-    if dtype._zarr_v3_name in zarr_config.get("array.v2_default_filters"):
-        filters = zarr_config.get(f"array.v2_default_filters.{dtype._zarr_v3_name}")
+    # the config will not allow keys to have "."  characters in them
+    # so we will access the config by transforming "." to "__"
+    dtype_name_conf = dtype._zarr_v3_name.replace(".", "__")
+
+    if dtype_name_conf in zarr_config.get("array.v2_default_filters"):
+        filters = zarr_config.get(f"array.v2_default_filters.{dtype_name_conf}")
     else:
         filters = zarr_config.get("array.v2_default_filters.default")
 
-    if dtype._zarr_v3_name in zarr_config.get("array.v2_default_compressor"):
-        compressor = zarr_config.get(f"array.v2_default_compressor.{dtype._zarr_v3_name}")
+    if dtype_name_conf in zarr_config.get("array.v2_default_compressor"):
+        compressor = zarr_config.get(f"array.v2_default_compressor.{dtype_name_conf}")
     else:
         compressor = zarr_config.get("array.v2_default_compressor.default")
 
@@ -4270,7 +4279,7 @@ def _parse_chunk_encoding_v2(
     *,
     compressor: CompressorsLike,
     filters: FiltersLike,
-    dtype: DTypeWrapper[Any, Any],
+    dtype: ZDType[_BaseDType, _BaseScalar],
 ) -> tuple[tuple[numcodecs.abc.Codec, ...] | None, numcodecs.abc.Codec | None]:
     """
     Generate chunk encoding classes for Zarr format 2 arrays with optional defaults.
@@ -4314,7 +4323,7 @@ def _parse_chunk_encoding_v3(
     compressors: CompressorsLike,
     filters: FiltersLike,
     serializer: SerializerLike,
-    dtype: DTypeWrapper[Any, Any],
+    dtype: ZDType[_BaseDType, _BaseScalar],
 ) -> tuple[tuple[ArrayArrayCodec, ...], ArrayBytesCodec, tuple[BytesBytesCodec, ...]]:
     """
     Generate chunk encoding classes for v3 arrays with optional defaults.
diff --git a/src/zarr/core/array_spec.py b/src/zarr/core/array_spec.py
index f297fafa24..e8e451944f 100644
--- a/src/zarr/core/array_spec.py
+++ b/src/zarr/core/array_spec.py
@@ -11,16 +11,13 @@
     parse_shapelike,
 )
 from zarr.core.config import config as zarr_config
-from zarr.core.dtype import parse_data_type
 
 if TYPE_CHECKING:
     from typing import NotRequired
 
-    import numpy.typing as npt
-
     from zarr.core.buffer import BufferPrototype
     from zarr.core.common import ChunkCoords
-    from zarr.core.dtype.wrapper import DTypeWrapper
+    from zarr.core.dtype.wrapper import ZDType, _BaseDType, _BaseScalar
 
 
 class ArrayConfigParams(TypedDict):
@@ -66,7 +63,7 @@ def from_dict(cls, data: ArrayConfigParams) -> Self:
         """
         kwargs_out: ArrayConfigParams = {}
         for f in fields(ArrayConfig):
-            field_name = cast(Literal["order", "write_empty_chunks"], f.name)
+            field_name = cast("Literal['order', 'write_empty_chunks']", f.name)
             if field_name not in data:
                 kwargs_out[field_name] = zarr_config.get(f"array.{field_name}")
             else:
@@ -92,7 +89,7 @@ def parse_array_config(data: ArrayConfigLike | None) -> ArrayConfig:
 @dataclass(frozen=True)
 class ArraySpec:
     shape: ChunkCoords
-    dtype: DTypeWrapper[Any, Any]
+    dtype: ZDType[_BaseDType, _BaseScalar]
     fill_value: Any
     config: ArrayConfig
     prototype: BufferPrototype
@@ -100,18 +97,16 @@ class ArraySpec:
     def __init__(
         self,
         shape: ChunkCoords,
-        dtype: npt.DTypeLike | DTypeWrapper[Any, Any],
+        dtype: ZDType[_BaseDType, _BaseScalar],
         fill_value: Any,
         config: ArrayConfig,
         prototype: BufferPrototype,
     ) -> None:
         shape_parsed = parse_shapelike(shape)
-        dtype_parsed = parse_data_type(dtype)
-
         fill_value_parsed = parse_fill_value(fill_value)
 
         object.__setattr__(self, "shape", shape_parsed)
-        object.__setattr__(self, "dtype", dtype_parsed)
+        object.__setattr__(self, "dtype", dtype)
         object.__setattr__(self, "fill_value", fill_value_parsed)
         object.__setattr__(self, "config", config)
         object.__setattr__(self, "prototype", prototype)
diff --git a/src/zarr/core/buffer/cpu.py b/src/zarr/core/buffer/cpu.py
index 9894fced51..225adb6f5c 100644
--- a/src/zarr/core/buffer/cpu.py
+++ b/src/zarr/core/buffer/cpu.py
@@ -150,7 +150,7 @@ def create(
         cls,
         *,
         shape: Iterable[int],
-        dtype: np.dtype[Any],
+        dtype: npt.DTypeLike,
         order: Literal["C", "F"] = "C",
         fill_value: Any | None = None,
     ) -> Self:
diff --git a/src/zarr/core/codec_pipeline.py b/src/zarr/core/codec_pipeline.py
index 222e97ce74..71600fee90 100644
--- a/src/zarr/core/codec_pipeline.py
+++ b/src/zarr/core/codec_pipeline.py
@@ -23,12 +23,11 @@
     from collections.abc import Iterable, Iterator
     from typing import Self
 
-    import numpy as np
-
     from zarr.abc.store import ByteGetter, ByteSetter
     from zarr.core.array_spec import ArraySpec
     from zarr.core.buffer import Buffer, BufferPrototype, NDBuffer
     from zarr.core.chunk_grids import ChunkGrid
+    from zarr.core.dtype.wrapper import ZDType, _BaseDType, _BaseScalar
 
 T = TypeVar("T")
 U = TypeVar("U")
@@ -133,7 +132,9 @@ def __iter__(self) -> Iterator[Codec]:
         yield self.array_bytes_codec
         yield from self.bytes_bytes_codecs
 
-    def validate(self, *, shape: ChunkCoords, dtype: np.dtype[Any], chunk_grid: ChunkGrid) -> None:
+    def validate(
+        self, *, shape: ChunkCoords, dtype: ZDType[_BaseDType, _BaseScalar], chunk_grid: ChunkGrid
+    ) -> None:
         for codec in self:
             codec.validate(shape=shape, dtype=dtype, chunk_grid=chunk_grid)
 
@@ -295,7 +296,7 @@ def _merge_chunk_array(
         is_complete_chunk: bool,
         drop_axes: tuple[int, ...],
     ) -> NDBuffer:
-        if chunk_selection == () or is_scalar(value.as_ndarray_like(), chunk_spec.dtype):
+        if chunk_selection == () or is_scalar(value.as_ndarray_like(), chunk_spec.dtype.to_dtype()):
             chunk_value = value
         else:
             chunk_value = value[out_selection]
diff --git a/src/zarr/core/common.py b/src/zarr/core/common.py
index d06236f793..4cb59f7a87 100644
--- a/src/zarr/core/common.py
+++ b/src/zarr/core/common.py
@@ -16,8 +16,6 @@
     overload,
 )
 
-import numpy as np
-
 from zarr.core.config import config as zarr_config
 
 if TYPE_CHECKING:
diff --git a/src/zarr/core/config.py b/src/zarr/core/config.py
index 054316fd37..8f87910daa 100644
--- a/src/zarr/core/config.py
+++ b/src/zarr/core/config.py
@@ -80,15 +80,15 @@ def enable_gpu(self) -> ConfigSet:
                 "v2_default_compressor": {"default": {"id": "zstd", "level": 0, "checksum": False}},
                 "v2_default_filters": {
                     "default": None,
-                    "variable_length_utf8": [{"id": "vlen-utf8"}],
-                    "fixed_length_ucs4": [{"id": "vlen-utf8"}],
-                    "fixed_length_ascii": [{"id": "vlen-bytes"}],
+                    "numpy__variable_length_utf8": [{"id": "vlen-utf8"}],
+                    "numpy__fixed_length_ucs4": [{"id": "vlen-utf8"}],
+                    "numpy__fixed_length_ascii": [{"id": "vlen-bytes"}],
                 },
                 "v3_default_filters": {"default": []},
                 "v3_default_serializer": {
                     "default": {"name": "bytes", "configuration": {"endian": "little"}},
-                    "variable_length_utf8": {"name": "vlen-utf8"},
-                    "fixed_length_ucs4": {"name": "vlen-utf8"},
+                    "numpy__variable_length_utf8": {"name": "vlen-utf8"},
+                    "numpy__fixed_length_ucs4": {"name": "vlen-utf8"},
                     "r*": {"name": "vlen-bytes"},
                 },
                 "v3_default_compressors": {
diff --git a/src/zarr/core/dtype/__init__.py b/src/zarr/core/dtype/__init__.py
index 432eabf2ce..4e594f8796 100644
--- a/src/zarr/core/dtype/__init__.py
+++ b/src/zarr/core/dtype/__init__.py
@@ -4,21 +4,23 @@
 
 import numpy as np
 
-from zarr.core.dtype.common import _NUMPY_SUPPORTS_VLEN_STRING
+from zarr.core.dtype._numpy import _NUMPY_SUPPORTS_VLEN_STRING
+from zarr.core.dtype.wrapper import _BaseDType, _BaseScalar
 
 if TYPE_CHECKING:
     import numpy.typing as npt
 
-    from zarr.core.common import JSON
+    from zarr.core.common import JSON, ZarrFormat
+
 
 from zarr.core.dtype._numpy import (
     Bool,
     Complex64,
     Complex128,
     DateTime64,
-    FixedLengthAsciiString,
+    FixedLengthAscii,
     FixedLengthBytes,
-    FixedLengthUnicodeString,
+    FixedLengthUnicode,
     Float16,
     Float32,
     Float64,
@@ -34,16 +36,15 @@
     VariableLengthString,
 )
 from zarr.core.dtype.registry import DataTypeRegistry
-from zarr.core.dtype.wrapper import DTypeWrapper
+from zarr.core.dtype.wrapper import ZDType
 
 __all__ = [
     "Complex64",
     "Complex128",
-    "DTypeWrapper",
     "DateTime64",
-    "FixedLengthAsciiString",
+    "FixedLengthAscii",
     "FixedLengthBytes",
-    "FixedLengthUnicodeString",
+    "FixedLengthUnicode",
     "Float16",
     "Float32",
     "Float64",
@@ -57,6 +58,7 @@
     "UInt32",
     "UInt64",
     "VariableLengthString",
+    "ZDType",
     "data_type_registry",
     "parse_data_type",
 ]
@@ -66,7 +68,7 @@
 INTEGER_DTYPE = Int8 | Int16 | Int32 | Int64 | UInt8 | UInt16 | UInt32 | UInt64
 FLOAT_DTYPE = Float16 | Float32 | Float64
 COMPLEX_DTYPE = Complex64 | Complex128
-STRING_DTYPE = FixedLengthUnicodeString | VariableLengthString | FixedLengthAsciiString
+STRING_DTYPE = FixedLengthUnicode | VariableLengthString | FixedLengthAscii
 DTYPE = (
     Bool
     | INTEGER_DTYPE
@@ -82,34 +84,39 @@
     data_type_registry.register(dtype._zarr_v3_name, dtype)
 
 
-def get_data_type_from_numpy(dtype: npt.DTypeLike) -> DTypeWrapper[Any, Any]:
+def get_data_type_from_native_dtype(dtype: npt.DTypeLike) -> ZDType[_BaseDType, _BaseScalar]:
     data_type_registry.lazy_load()
     if not isinstance(dtype, np.dtype):
         if dtype in (str, "str"):
             if _NUMPY_SUPPORTS_VLEN_STRING:
-                np_dtype = np.dtype("T")
+                na_dtype = np.dtype("T")
             else:
-                np_dtype = np.dtype("O")
+                na_dtype = np.dtype("O")
         elif isinstance(dtype, list):
             # this is a valid _VoidDTypeLike check
-            np_dtype = np.dtype([tuple(d) for d in dtype])
+            na_dtype = np.dtype([tuple(d) for d in dtype])
         else:
-            np_dtype = np.dtype(dtype)
+            na_dtype = np.dtype(dtype)
     else:
-        np_dtype = dtype
-    return data_type_registry.match_dtype(np_dtype)
+        na_dtype = dtype
+    return data_type_registry.match_dtype(na_dtype)
 
 
-def get_data_type_from_dict(dtype: dict[str, JSON]) -> DTypeWrapper[Any, Any]:
-    return data_type_registry.match_json(dtype)
+def get_data_type_from_json(
+    dtype: JSON, zarr_format: ZarrFormat
+) -> ZDType[_BaseDType, _BaseScalar]:
+    return data_type_registry.match_json(dtype, zarr_format=zarr_format)
 
 
 def parse_data_type(
-    dtype: npt.DTypeLike | DTypeWrapper[Any, Any] | dict[str, JSON],
-) -> DTypeWrapper[Any, Any]:
-    if isinstance(dtype, DTypeWrapper):
+    dtype: npt.DTypeLike | ZDType[Any, Any] | dict[str, JSON], zarr_format: ZarrFormat
+) -> ZDType[Any, Any]:
+    if isinstance(dtype, ZDType):
         return dtype
     elif isinstance(dtype, dict):
-        return get_data_type_from_dict(dtype)
+        # This branch assumes that the data type has been specified in the JSON form
+        # but it's also possible for numpy data types to be specified as dictionaries, which will
+        # cause an error in the `get_data_type_from_json`, but that's ok for now
+        return get_data_type_from_json(dtype, zarr_format=zarr_format)  # type: ignore[arg-type]
     else:
-        return get_data_type_from_numpy(dtype)
+        return get_data_type_from_native_dtype(dtype)
diff --git a/src/zarr/core/dtype/_numpy.py b/src/zarr/core/dtype/_numpy.py
index c562f0a593..a8bd2b5951 100644
--- a/src/zarr/core/dtype/_numpy.py
+++ b/src/zarr/core/dtype/_numpy.py
@@ -2,13 +2,22 @@
 
 import base64
 import re
+from collections.abc import Sequence
 from dataclasses import dataclass
-from typing import TYPE_CHECKING, Any, ClassVar, Literal, Self, TypeGuard, cast, get_args
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    ClassVar,
+    Literal,
+    Self,
+    TypeGuard,
+    cast,
+    get_args,
+)
 
 import numpy as np
 
 from zarr.core.dtype.common import (
-    _NUMPY_SUPPORTS_VLEN_STRING,
     DataTypeValidationError,
     Endianness,
     JSONFloat,
@@ -16,27 +25,26 @@
     bytes_to_json,
     check_json_bool,
     check_json_complex_float,
-    check_json_complex_float_v3,
-    check_json_float_v2,
+    check_json_float,
     check_json_int,
     check_json_str,
     complex_from_json,
     complex_to_json,
     datetime_from_json,
     datetime_to_json,
-    endianness_from_numpy_str,
-    endianness_to_numpy_str,
     float_from_json,
     float_to_json,
 )
-from zarr.core.dtype.wrapper import DTypeWrapper, TDType
+from zarr.core.dtype.wrapper import ZDType, _BaseDType, _BaseScalar
 
 if TYPE_CHECKING:
     from zarr.core.common import JSON, ZarrFormat
 
+EndiannessNumpy = Literal[">", "<", "=", "|"]
+
 
 @dataclass(frozen=True, kw_only=True)
-class Bool(DTypeWrapper[np.dtypes.BoolDType, np.bool_]):
+class Bool(ZDType[np.dtypes.BoolDType, np.bool_]):
     """
     Wrapper for numpy boolean dtype.
 
@@ -49,10 +57,37 @@ class Bool(DTypeWrapper[np.dtypes.BoolDType, np.bool_]):
     """
 
     _zarr_v3_name = "bool"
-    dtype_cls: ClassVar[type[np.dtypes.BoolDType]] = np.dtypes.BoolDType
+    _zarr_v2_names: ClassVar[tuple[str,...]] = ("|b1",)
+    dtype_cls = np.dtypes.BoolDType
+
+    @classmethod
+    def _from_dtype_unsafe(cls, dtype: np.dtypes.BoolDType) -> Self:
+        return cls()
+
+    def to_dtype(self: Self) -> np.dtypes.BoolDType:
+        return self.dtype_cls()
+
+    @classmethod
+    def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[Literal["bool", "|b1"]]:
+        """
+        Check that the input is a valid JSON representation of a bool.
+        """
+        if zarr_format == 2:
+            return data in cls._zarr_v2_names
+        elif zarr_format == 3:
+            return data == cls._zarr_v3_name
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+
+    def to_json(self, zarr_format: ZarrFormat) -> str:
+        if zarr_format == 2:
+            return self.to_dtype().str
+        elif zarr_format == 3:
+            return self._zarr_v3_name
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
 
-    def to_dict(self) -> dict[str, JSON]:
-        return {"name": self.get_name(zarr_format=3)}
+    @classmethod
+    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
+        return cls()
 
     def default_value(self) -> np.bool_:
         """
@@ -65,26 +100,6 @@ def default_value(self) -> np.bool_:
         """
         return np.False_
 
-    @classmethod
-    def _from_dtype_unsafe(cls, dtype: np.dtypes.BoolDType) -> Self:
-        """
-        Wrap a numpy boolean dtype without checking.
-
-        Parameters
-        ----------
-        dtype : np.dtypes.BoolDType
-            The numpy dtype to wrap.
-
-        Returns
-        -------
-        Self
-            The wrapped dtype.
-        """
-        return cls()
-
-    def to_dtype(self) -> np.dtypes.BoolDType:
-        return self.dtype_cls()
-
     def to_json_value(self, data: np.bool_, zarr_format: ZarrFormat) -> bool:
         """
         Convert a boolean value to JSON-serializable format.
@@ -120,337 +135,730 @@ def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.bool_:
             The numpy boolean scalar.
         """
         if check_json_bool(data):
-            return self.cast_value(data)
+            return np.bool_(data)
         raise TypeError(f"Invalid type: {data}. Expected a boolean.")
 
 
 @dataclass(frozen=True, kw_only=True)
-class Int8(DTypeWrapper[np.dtypes.Int8DType, np.int8]):
+class Int8(ZDType[np.dtypes.Int8DType, np.int8]):
     dtype_cls = np.dtypes.Int8DType
     _zarr_v3_name = "int8"
+    _zarr_v2_names: ClassVar[tuple[str, ...]] = ("|i1",)
 
     @classmethod
     def _from_dtype_unsafe(cls, dtype: np.dtypes.Int8DType) -> Self:
         return cls()
 
-    def to_dtype(self) -> np.dtypes.Int8DType:
+    def to_dtype(self: Self) -> np.dtypes.Int8DType:
         return self.dtype_cls()
 
-    def to_dict(self) -> dict[str, JSON]:
-        return {"name": self.get_name(zarr_format=3)}
+    @classmethod
+    def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[Literal["int8", "|i1"]]:
+        """
+        Check that the input is a valid JSON representation of a 8-bit integer.
+        """
+        if zarr_format == 2:
+            return data in cls._zarr_v2_names
+        elif zarr_format == 3:
+            return data == cls._zarr_v3_name
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+
+    def to_json(self, zarr_format: ZarrFormat) -> str:
+        if zarr_format == 2:
+            return self.to_dtype().str
+        elif zarr_format == 3:
+            return self._zarr_v3_name
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+
+    @classmethod
+    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
+        return cls()
 
     def default_value(self) -> np.int8:
-        return self.to_dtype().type(0)
+        """
+        Get the default value.
+
+        Returns
+        -------
+        np.int8
+            The default value.
+        """
+        return np.int8(0)
 
     def to_json_value(self, data: np.int8, zarr_format: ZarrFormat) -> int:
+        """
+        Convert a numpy 8-bit int to JSON-serializable format.
+
+        Parameters
+        ----------
+        data : np.int8
+            The value to convert.
+        zarr_format : ZarrFormat
+            The zarr format version.
+
+        Returns
+        -------
+        int
+            The JSON-serializable form of the scalar.
+        """
         return int(data)
 
     def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.int8:
+        """
+        Read a JSON-serializable value as a numpy int8 scalar.
+
+        Parameters
+        ----------
+        data : JSON
+            The JSON-serializable value.
+        zarr_format : ZarrFormat
+            The zarr format version.
+
+        Returns
+        -------
+        np.bool_
+            The numpy boolean scalar.
+        """
         if check_json_int(data):
-            return self.cast_value(data)
+            return np.int8(data)
         raise TypeError(f"Invalid type: {data}. Expected an integer.")
 
 
 @dataclass(frozen=True, kw_only=True)
-class UInt8(DTypeWrapper[np.dtypes.UInt8DType, np.uint8]):
+class UInt8(ZDType[np.dtypes.UInt8DType, np.uint8]):
     dtype_cls = np.dtypes.UInt8DType
     _zarr_v3_name = "uint8"
+    _zarr_v2_names: ClassVar[tuple[str, ...]] = ("|u1",)
 
     @classmethod
     def _from_dtype_unsafe(cls, dtype: np.dtypes.UInt8DType) -> Self:
         return cls()
 
-    def to_dtype(self) -> np.dtypes.UInt8DType:
+    def to_dtype(self: Self) -> np.dtypes.UInt8DType:
         return self.dtype_cls()
 
-    def to_dict(self) -> dict[str, JSON]:
-        return {"name": self.get_name(zarr_format=3)}
+    @classmethod
+    def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[Literal["uint8", "|u1"]]:
+        """
+        Check that the input is a valid JSON representation of an unsigned 8-bit integer.
+        """
+        if zarr_format == 2:
+            return data in cls._zarr_v2_names
+        elif zarr_format == 3:
+            return data == cls._zarr_v3_name
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+
+    def to_json(self, zarr_format: ZarrFormat) -> str:
+        if zarr_format == 2:
+            return self.to_dtype().str
+        elif zarr_format == 3:
+            return self._zarr_v3_name
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+
+    @classmethod
+    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
+        return cls()
 
     def default_value(self) -> np.uint8:
-        return self.to_dtype().type(0)
+        """
+        Get the default value for this data type.
+
+        Returns
+        -------
+        np.uint8
+            The default value.
+        """
+        return np.uint8(0)
 
     def to_json_value(self, data: np.uint8, zarr_format: ZarrFormat) -> int:
+        """
+        Convert a numpy unsigned 8-bit integer to JSON-serializable format.
+
+        Parameters
+        ----------
+        data : np.uint8
+            The value to convert.
+        zarr_format : ZarrFormat
+            The zarr format version.
+
+        Returns
+        -------
+        int
+            The JSON-serializable form of the scalar.
+        """
         return int(data)
 
     def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.uint8:
+        """
+        Read a JSON-serializable value as a numpy boolean scalar.
+
+        Parameters
+        ----------
+        data : JSON
+            The JSON-serializable value.
+        zarr_format : ZarrFormat
+            The zarr format version.
+
+        Returns
+        -------
+        np.bool_
+            The numpy boolean scalar.
+        """
         if check_json_int(data):
-            return self.cast_value(data)
+            return np.uint8(data)
         raise TypeError(f"Invalid type: {data}. Expected an integer.")
 
 
 @dataclass(frozen=True, kw_only=True)
-class Int16(DTypeWrapper[np.dtypes.Int16DType, np.int16]):
+class Int16(ZDType[np.dtypes.Int16DType, np.int16]):
     dtype_cls = np.dtypes.Int16DType
     _zarr_v3_name = "int16"
+    _zarr_v2_names: ClassVar[tuple[str, ...]] = (">i2", "<i2")
     endianness: Endianness | None = "native"
 
     @classmethod
     def _from_dtype_unsafe(cls, dtype: np.dtypes.Int16DType) -> Self:
-        return cls(endianness=endianness_from_numpy_str(dtype.byteorder))
+        byte_order = cast("EndiannessNumpy", dtype.byteorder)
+        return cls(endianness=endianness_from_numpy_str(byte_order))
 
     def to_dtype(self) -> np.dtypes.Int16DType:
-        return self.dtype_cls().newbyteorder(endianness_to_numpy_str(self.endianness))
+        byte_order = endianness_to_numpy_str(self.endianness)
+        return self.dtype_cls().newbyteorder(byte_order)
 
-    def to_dict(self) -> dict[str, JSON]:
-        return {"name": self.get_name(zarr_format=3)}
+    @classmethod
+    def check_json(
+        cls, data: JSON, zarr_format: ZarrFormat
+    ) -> TypeGuard[Literal["int16", ">i2", "<i2"]]:
+        """
+        Check that the input is a valid JSON representation of a signed 16-bit integer.
+        """
+        if zarr_format == 2:
+            return data in cls._zarr_v2_names
+        elif zarr_format == 3:
+            return data == cls._zarr_v3_name
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+
+    def to_json(self, zarr_format: ZarrFormat) -> str:
+        if zarr_format == 2:
+            return self.to_dtype().str
+        elif zarr_format == 3:
+            return self._zarr_v3_name
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+
+    @classmethod
+    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
+        if zarr_format == 2:
+            return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
+        elif zarr_format == 3:
+            return cls()
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
 
     def default_value(self) -> np.int16:
-        return self.cast_value(0)
+        return self.to_dtype().type(0)
 
     def to_json_value(self, data: np.int16, zarr_format: ZarrFormat) -> int:
         return int(data)
 
     def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.int16:
         if check_json_int(data):
-            return self.cast_value(data)
+            return self.to_dtype().type(data)
         raise TypeError(f"Invalid type: {data}. Expected an integer.")
 
 
 @dataclass(frozen=True, kw_only=True)
-class UInt16(DTypeWrapper[np.dtypes.UInt16DType, np.uint16]):
+class UInt16(ZDType[np.dtypes.UInt16DType, np.uint16]):
     dtype_cls = np.dtypes.UInt16DType
     _zarr_v3_name = "uint16"
+    _zarr_v2_names: ClassVar[tuple[str, ...]] = (">u2", "<u2")
     endianness: Endianness | None = "native"
 
     @classmethod
     def _from_dtype_unsafe(cls, dtype: np.dtypes.UInt16DType) -> Self:
-        return cls(endianness=endianness_from_numpy_str(dtype.byteorder))
+        byte_order = cast("EndiannessNumpy", dtype.byteorder)
+        return cls(endianness=endianness_from_numpy_str(byte_order))
 
     def to_dtype(self) -> np.dtypes.UInt16DType:
-        return self.dtype_cls().newbyteorder(endianness_to_numpy_str(self.endianness))
+        byte_order = endianness_to_numpy_str(self.endianness)
+        return self.dtype_cls().newbyteorder(byte_order)
+
+    @classmethod
+    def check_json(
+        cls, data: JSON, zarr_format: ZarrFormat
+    ) -> TypeGuard[Literal["uint16", ">u2", "<u2"]]:
+        """
+        Check that the input is a valid JSON representation of an unsigned 16-bit integer.
+        """
+        if zarr_format == 2:
+            return data in cls._zarr_v2_names
+        elif zarr_format == 3:
+            return data == cls._zarr_v3_name
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
 
-    def to_dict(self) -> dict[str, JSON]:
-        return {"name": self.get_name(zarr_format=3)}
+    def to_json(self, zarr_format: ZarrFormat) -> str:
+        if zarr_format == 2:
+            return self.to_dtype().str
+        elif zarr_format == 3:
+            return self._zarr_v3_name
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+
+    @classmethod
+    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
+        if zarr_format == 2:
+            return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
+        elif zarr_format == 3:
+            return cls()
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
 
     def default_value(self) -> np.uint16:
-        return self.cast_value(0)
+        return self.to_dtype().type(0)
 
     def to_json_value(self, data: np.uint16, zarr_format: ZarrFormat) -> int:
         return int(data)
 
     def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.uint16:
         if check_json_int(data):
-            return self.cast_value(data)
+            return self.to_dtype().type(data)
         raise TypeError(f"Invalid type: {data}. Expected an integer.")
 
 
 @dataclass(frozen=True, kw_only=True)
-class Int32(DTypeWrapper[np.dtypes.Int32DType, np.int32]):
+class Int32(ZDType[np.dtypes.Int32DType, np.int32]):
     dtype_cls = np.dtypes.Int32DType
     _zarr_v3_name = "int32"
+    _zarr_v2_names: ClassVar[tuple[str, ...]] = (">i4", "<i4")
     endianness: Endianness | None = "native"
 
     @classmethod
     def _from_dtype_unsafe(cls, dtype: np.dtypes.Int32DType) -> Self:
-        return cls(endianness=endianness_from_numpy_str(dtype.byteorder))
+        byte_order = cast("EndiannessNumpy", dtype.byteorder)
+        return cls(endianness=endianness_from_numpy_str(byte_order))
 
     def to_dtype(self) -> np.dtypes.Int32DType:
-        return self.dtype_cls().newbyteorder(endianness_to_numpy_str(self.endianness))
+        byte_order = endianness_to_numpy_str(self.endianness)
+        return self.dtype_cls().newbyteorder(byte_order)
+
+    @classmethod
+    def check_json(
+        cls, data: JSON, zarr_format: ZarrFormat
+    ) -> TypeGuard[Literal["int32", ">i4", "<i4"]]:
+        """
+        Check that the input is a valid JSON representation of a signed 32-bit integer.
+        """
+        if zarr_format == 2:
+            return data in cls._zarr_v2_names
+        elif zarr_format == 3:
+            return data == cls._zarr_v3_name
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
 
-    def to_dict(self) -> dict[str, JSON]:
-        return {"name": self.get_name(zarr_format=3)}
+    def to_json(self, zarr_format: ZarrFormat) -> str:
+        if zarr_format == 2:
+            return self.to_dtype().str
+        elif zarr_format == 3:
+            return self._zarr_v3_name
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+
+    @classmethod
+    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
+        if zarr_format == 2:
+            return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
+        elif zarr_format == 3:
+            return cls()
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
 
     def default_value(self) -> np.int32:
-        return self.cast_value(0)
+        return self.to_dtype().type(0)
 
     def to_json_value(self, data: np.int32, zarr_format: ZarrFormat) -> int:
         return int(data)
 
     def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.int32:
         if check_json_int(data):
-            return self.cast_value(data)
+            return self.to_dtype().type(data)
         raise TypeError(f"Invalid type: {data}. Expected an integer.")
 
 
 @dataclass(frozen=True, kw_only=True)
-class UInt32(DTypeWrapper[np.dtypes.UInt32DType, np.uint32]):
+class UInt32(ZDType[np.dtypes.UInt32DType, np.uint32]):
     dtype_cls = np.dtypes.UInt32DType
     _zarr_v3_name = "uint32"
+    _zarr_v2_names: ClassVar[tuple[str, ...]] = (">u4", "<u4")
     endianness: Endianness | None = "native"
 
     @classmethod
     def _from_dtype_unsafe(cls, dtype: np.dtypes.UInt32DType) -> Self:
-        return cls(endianness=endianness_from_numpy_str(dtype.byteorder))
+        byte_order = cast("EndiannessNumpy", dtype.byteorder)
+        return cls(endianness=endianness_from_numpy_str(byte_order))
 
     def to_dtype(self) -> np.dtypes.UInt32DType:
-        return self.dtype_cls().newbyteorder(endianness_to_numpy_str(self.endianness))
+        byte_order = endianness_to_numpy_str(self.endianness)
+        return self.dtype_cls().newbyteorder(byte_order)
 
-    def to_dict(self) -> dict[str, JSON]:
-        return {"name": self.get_name(zarr_format=3)}
+    @classmethod
+    def check_json(
+        cls, data: JSON, zarr_format: ZarrFormat
+    ) -> TypeGuard[Literal["uint32", ">u4", "<u4"]]:
+        """
+        Check that the input is a valid JSON representation of an unsigned 16-bit integer.
+        """
+        if zarr_format == 2:
+            return data in cls._zarr_v2_names
+        elif zarr_format == 3:
+            return data == cls._zarr_v3_name
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+
+    def to_json(self, zarr_format: ZarrFormat) -> str:
+        if zarr_format == 2:
+            return self.to_dtype().str
+        elif zarr_format == 3:
+            return self._zarr_v3_name
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+
+    @classmethod
+    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
+        if zarr_format == 2:
+            return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
+        elif zarr_format == 3:
+            return cls()
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
 
     def default_value(self) -> np.uint32:
-        return self.cast_value(0)
+        return self.to_dtype().type(0)
 
     def to_json_value(self, data: np.uint32, zarr_format: ZarrFormat) -> int:
         return int(data)
 
     def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.uint32:
         if check_json_int(data):
-            return self.cast_value(data)
+            return self.to_dtype().type(data)
         raise TypeError(f"Invalid type: {data}. Expected an integer.")
 
 
 @dataclass(frozen=True, kw_only=True)
-class Int64(DTypeWrapper[np.dtypes.Int64DType, np.int64]):
+class Int64(ZDType[np.dtypes.Int64DType, np.int64]):
     dtype_cls = np.dtypes.Int64DType
     _zarr_v3_name = "int64"
+    _zarr_v2_names: ClassVar[tuple[str, ...]] = (">i8", "<i8")
     endianness: Endianness | None = "native"
 
     @classmethod
     def _from_dtype_unsafe(cls, dtype: np.dtypes.Int64DType) -> Self:
-        return cls(endianness=endianness_from_numpy_str(dtype.byteorder))
+        byte_order = cast("EndiannessNumpy", dtype.byteorder)
+        return cls(endianness=endianness_from_numpy_str(byte_order))
 
     def to_dtype(self) -> np.dtypes.Int64DType:
-        return self.dtype_cls().newbyteorder(endianness_to_numpy_str(self.endianness))
+        byte_order = endianness_to_numpy_str(self.endianness)
+        return self.dtype_cls().newbyteorder(byte_order)
 
-    def to_dict(self) -> dict[str, JSON]:
-        return {"name": self.get_name(zarr_format=3)}
+    @classmethod
+    def check_json(
+        cls, data: JSON, zarr_format: ZarrFormat
+    ) -> TypeGuard[Literal["int64", ">i8", "<i8"]]:
+        """
+        Check that the input is a valid JSON representation of a signed 16-bit integer.
+        """
+        if zarr_format == 2:
+            return data in cls._zarr_v2_names
+        elif zarr_format == 3:
+            return data == cls._zarr_v3_name
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+
+    def to_json(self, zarr_format: ZarrFormat) -> str:
+        if zarr_format == 2:
+            return self.to_dtype().str
+        elif zarr_format == 3:
+            return self._zarr_v3_name
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+
+    @classmethod
+    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
+        if zarr_format == 2:
+            return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
+        elif zarr_format == 3:
+            return cls()
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
 
     def default_value(self) -> np.int64:
-        return self.cast_value(0)
+        return self.to_dtype().type(0)
 
     def to_json_value(self, data: np.int64, zarr_format: ZarrFormat) -> int:
         return int(data)
 
     def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.int64:
         if check_json_int(data):
-            return self.cast_value(data)
+            return self.to_dtype().type(data)
         raise TypeError(f"Invalid type: {data}. Expected an integer.")
 
 
 @dataclass(frozen=True, kw_only=True)
-class UInt64(DTypeWrapper[np.dtypes.UInt64DType, np.uint64]):
+class UInt64(ZDType[np.dtypes.UInt64DType, np.uint64]):
     dtype_cls = np.dtypes.UInt64DType
     _zarr_v3_name = "uint64"
+    _zarr_v2_names: ClassVar[tuple[str, ...]] = (">u8", "<u8")
     endianness: Endianness | None = "native"
 
     @classmethod
     def _from_dtype_unsafe(cls, dtype: np.dtypes.UInt64DType) -> Self:
-        return cls(endianness=endianness_from_numpy_str(dtype.byteorder))
+        byte_order = cast("EndiannessNumpy", dtype.byteorder)
+        return cls(endianness=endianness_from_numpy_str(byte_order))
 
     def to_dtype(self) -> np.dtypes.UInt64DType:
-        return self.dtype_cls().newbyteorder(endianness_to_numpy_str(self.endianness))
+        byte_order = endianness_to_numpy_str(self.endianness)
+        return self.dtype_cls().newbyteorder(byte_order)
 
-    def to_dict(self) -> dict[str, JSON]:
-        return {"name": self.get_name(zarr_format=3)}
+    @classmethod
+    def check_json(
+        cls, data: JSON, zarr_format: ZarrFormat
+    ) -> TypeGuard[Literal["uint64", ">u8", "<u8"]]:
+        """
+        Check that the input is a valid JSON representation of a signed 16-bit integer.
+        """
+        if zarr_format == 2:
+            return data in cls._zarr_v2_names
+        elif zarr_format == 3:
+            return data == cls._zarr_v3_name
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+
+    def to_json(self, zarr_format: ZarrFormat) -> str:
+        if zarr_format == 2:
+            return self.to_dtype().str
+        elif zarr_format == 3:
+            return self._zarr_v3_name
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+
+    @classmethod
+    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
+        if zarr_format == 2:
+            return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
+        elif zarr_format == 3:
+            return cls()
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
 
     def default_value(self) -> np.uint64:
-        return self.cast_value(0)
+        return self.to_dtype().type(0)
 
     def to_json_value(self, data: np.uint64, zarr_format: ZarrFormat) -> int:
         return int(data)
 
     def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.uint64:
         if check_json_int(data):
-            return self.cast_value(data)
+            return self.to_dtype().type(data)
         raise TypeError(f"Invalid type: {data}. Expected an integer.")
 
 
 @dataclass(frozen=True, kw_only=True)
-class Float16(DTypeWrapper[np.dtypes.Float16DType, np.float16]):
+class Float16(ZDType[np.dtypes.Float16DType, np.float16]):
     dtype_cls = np.dtypes.Float16DType
     _zarr_v3_name = "float16"
+    _zarr_v2_names: ClassVar[tuple[str, ...]] = (">f2", "<f2")
     endianness: Endianness | None = "native"
 
     @classmethod
     def _from_dtype_unsafe(cls, dtype: np.dtypes.Float16DType) -> Self:
-        return cls(endianness=endianness_from_numpy_str(dtype.byteorder))
+        byte_order = cast("EndiannessNumpy", dtype.byteorder)
+        return cls(endianness=endianness_from_numpy_str(byte_order))
 
     def to_dtype(self) -> np.dtypes.Float16DType:
-        return self.dtype_cls().newbyteorder(endianness_to_numpy_str(self.endianness))
+        byte_order = endianness_to_numpy_str(self.endianness)
+        return self.dtype_cls().newbyteorder(byte_order)
 
-    def to_dict(self) -> dict[str, JSON]:
-        return {"name": self.get_name(zarr_format=3)}
+    @classmethod
+    def check_json(
+        cls, data: JSON, zarr_format: ZarrFormat
+    ) -> TypeGuard[Literal["float", ">f2", "<f2"]]:
+        """
+        Check that the input is a valid JSON representation of a signed 16-bit integer.
+        """
+        if zarr_format == 2:
+            return data in cls._zarr_v2_names
+        elif zarr_format == 3:
+            return data == cls._zarr_v3_name
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+
+    def to_json(self, zarr_format: ZarrFormat) -> str:
+        if zarr_format == 2:
+            return self.to_dtype().str
+        elif zarr_format == 3:
+            return self._zarr_v3_name
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+
+    @classmethod
+    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
+        if zarr_format == 2:
+            return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
+        elif zarr_format == 3:
+            return cls()
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
 
     def default_value(self) -> np.float16:
-        return self.to_dtype().type(0.0)
+        return self.to_dtype().type(0)
 
     def to_json_value(self, data: np.float16, zarr_format: ZarrFormat) -> JSONFloat:
         return float_to_json(data, zarr_format)
 
     def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.float16:
-        if check_json_float_v2(data):
+        if check_json_float(data, zarr_format=zarr_format):
             return self.to_dtype().type(float_from_json(data, zarr_format))
         raise TypeError(f"Invalid type: {data}. Expected a float.")
 
 
 @dataclass(frozen=True, kw_only=True)
-class Float32(DTypeWrapper[np.dtypes.Float32DType, np.float32]):
+class Float32(ZDType[np.dtypes.Float32DType, np.float32]):
     dtype_cls = np.dtypes.Float32DType
     _zarr_v3_name = "float32"
+    _zarr_v2_names: ClassVar[tuple[str, ...]] = (">f4", "<f4")
     endianness: Endianness | None = "native"
 
     @classmethod
     def _from_dtype_unsafe(cls, dtype: np.dtypes.Float32DType) -> Self:
-        return cls(endianness=endianness_from_numpy_str(dtype.byteorder))
+        byte_order = cast("EndiannessNumpy", dtype.byteorder)
+        return cls(endianness=endianness_from_numpy_str(byte_order))
 
     def to_dtype(self) -> np.dtypes.Float32DType:
-        return self.dtype_cls().newbyteorder(endianness_to_numpy_str(self.endianness))
+        byte_order = endianness_to_numpy_str(self.endianness)
+        return self.dtype_cls().newbyteorder(byte_order)
 
-    def to_dict(self) -> dict[str, JSON]:
-        return {"name": self.get_name(zarr_format=3)}
+    @classmethod
+    def check_json(
+        cls, data: JSON, zarr_format: ZarrFormat
+    ) -> TypeGuard[Literal["float32", ">f4", "<f4"]]:
+        """
+        Check that the input is a valid JSON representation of a signed 16-bit integer.
+        """
+        if zarr_format == 2:
+            return data in cls._zarr_v2_names
+        elif zarr_format == 3:
+            return data == cls._zarr_v3_name
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
 
-    def cast_value(self, value: object) -> np.float32:
-        return self.to_dtype().type(value)
+    def to_json(self, zarr_format: ZarrFormat) -> str:
+        if zarr_format == 2:
+            return self.to_dtype().str
+        elif zarr_format == 3:
+            return self._zarr_v3_name
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+
+    @classmethod
+    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
+        if zarr_format == 2:
+            return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
+        elif zarr_format == 3:
+            return cls()
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
 
     def default_value(self) -> np.float32:
-        return self.to_dtype().type(0.0)
+        return self.to_dtype().type(0)
 
     def to_json_value(self, data: np.float32, zarr_format: ZarrFormat) -> JSONFloat:
         return float_to_json(data, zarr_format)
 
     def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.float32:
-        if check_json_float_v2(data):
+        if check_json_float(data, zarr_format=zarr_format):
             return self.to_dtype().type(float_from_json(data, zarr_format))
         raise TypeError(f"Invalid type: {data}. Expected a float.")
 
 
 @dataclass(frozen=True, kw_only=True)
-class Float64(DTypeWrapper[np.dtypes.Float64DType, np.float64]):
+class Float64(ZDType[np.dtypes.Float64DType, np.float64]):
     dtype_cls = np.dtypes.Float64DType
     _zarr_v3_name = "float64"
+    _zarr_v2_names: ClassVar[tuple[str, ...]] = (">f8", "<f8")
     endianness: Endianness | None = "native"
 
     @classmethod
     def _from_dtype_unsafe(cls, dtype: np.dtypes.Float64DType) -> Self:
-        return cls(endianness=endianness_from_numpy_str(dtype.byteorder))
+        byte_order = cast("EndiannessNumpy", dtype.byteorder)
+        return cls(endianness=endianness_from_numpy_str(byte_order))
 
     def to_dtype(self) -> np.dtypes.Float64DType:
-        return self.dtype_cls().newbyteorder(endianness_to_numpy_str(self.endianness))
+        byte_order = endianness_to_numpy_str(self.endianness)
+        return self.dtype_cls().newbyteorder(byte_order)
+
+    @classmethod
+    def check_json(
+        cls, data: JSON, zarr_format: ZarrFormat
+    ) -> TypeGuard[Literal["float64", ">f8", "<f8"]]:
+        """
+        Check that the input is a valid JSON representation of a signed 16-bit integer.
+        """
+        if zarr_format == 2:
+            return data in cls._zarr_v2_names
+        elif zarr_format == 3:
+            return data == cls._zarr_v3_name
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
 
-    def to_dict(self) -> dict[str, JSON]:
-        return {"name": self.get_name(zarr_format=3)}
+    def to_json(self, zarr_format: ZarrFormat) -> str:
+        if zarr_format == 2:
+            return self.to_dtype().str
+        elif zarr_format == 3:
+            return self._zarr_v3_name
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+
+    @classmethod
+    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
+        if zarr_format == 2:
+            return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
+        elif zarr_format == 3:
+            return cls()
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
 
     def default_value(self) -> np.float64:
-        return self.to_dtype().type(0.0)
+        return self.to_dtype().type(0)
 
     def to_json_value(self, data: np.float64, zarr_format: ZarrFormat) -> JSONFloat:
         return float_to_json(data, zarr_format)
 
     def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.float64:
-        if check_json_float_v2(data):
+        if check_json_float(data, zarr_format=zarr_format):
             return self.to_dtype().type(float_from_json(data, zarr_format))
         raise TypeError(f"Invalid type: {data}. Expected a float.")
 
 
 @dataclass(frozen=True, kw_only=True)
-class Complex64(DTypeWrapper[np.dtypes.Complex64DType, np.complex64]):
+class Complex64(ZDType[np.dtypes.Complex64DType, np.complex64]):
     dtype_cls = np.dtypes.Complex64DType
     _zarr_v3_name = "complex64"
+    _zarr_v2_names: ClassVar[tuple[str, ...]] = (">c8", "<c8")
     endianness: Endianness | None = "native"
 
     @classmethod
     def _from_dtype_unsafe(cls, dtype: np.dtypes.Complex64DType) -> Self:
-        return cls()
+        byte_order = cast("EndiannessNumpy", dtype.byteorder)
+        return cls(endianness=endianness_from_numpy_str(byte_order))
 
     def to_dtype(self) -> np.dtypes.Complex64DType:
-        return self.dtype_cls().newbyteorder(endianness_to_numpy_str(self.endianness))
+        byte_order = endianness_to_numpy_str(self.endianness)
+        return self.dtype_cls().newbyteorder(byte_order)
+
+    @classmethod
+    def check_json(
+        cls, data: JSON, zarr_format: ZarrFormat
+    ) -> TypeGuard[Literal["complex64", ">c8", "<c8"]]:
+        """
+        Check that the input is a valid JSON representation of a signed 16-bit integer.
+        """
+        if zarr_format == 2:
+            return data in cls._zarr_v2_names
+        elif zarr_format == 3:
+            return data == cls._zarr_v3_name
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+
+    def to_json(self, zarr_format: ZarrFormat) -> str:
+        if zarr_format == 2:
+            return self.to_dtype().str
+        elif zarr_format == 3:
+            return self._zarr_v3_name
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
 
-    def to_dict(self) -> dict[str, JSON]:
-        return {"name": self.get_name(zarr_format=3)}
+    @classmethod
+    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
+        if zarr_format == 2:
+            return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
+        elif zarr_format == 3:
+            return cls()
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
 
     def default_value(self) -> np.complex64:
-        return np.complex64(0.0)
+        return self.to_dtype().type(0)
 
     def to_json_value(
         self, data: np.complex64, zarr_format: ZarrFormat
@@ -464,23 +872,51 @@ def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.complex6
 
 
 @dataclass(frozen=True, kw_only=True)
-class Complex128(DTypeWrapper[np.dtypes.Complex128DType, np.complex128]):
+class Complex128(ZDType[np.dtypes.Complex128DType, np.complex128]):
     dtype_cls = np.dtypes.Complex128DType
     _zarr_v3_name = "complex128"
+    _zarr_v2_names: ClassVar[tuple[str, ...]] = (">c16", "<c16")
     endianness: Endianness | None = "native"
 
     @classmethod
     def _from_dtype_unsafe(cls, dtype: np.dtypes.Complex128DType) -> Self:
-        return cls(endianness=endianness_from_numpy_str(dtype.byteorder))
+        byte_order = cast("EndiannessNumpy", dtype.byteorder)
+        return cls(endianness=endianness_from_numpy_str(byte_order))
 
     def to_dtype(self) -> np.dtypes.Complex128DType:
-        return self.dtype_cls().newbyteorder(endianness_to_numpy_str(self.endianness))
+        byte_order = endianness_to_numpy_str(self.endianness)
+        return self.dtype_cls().newbyteorder(byte_order)
 
-    def to_dict(self) -> dict[str, JSON]:
-        return {"name": self.get_name(zarr_format=3)}
+    @classmethod
+    def check_json(
+        cls, data: JSON, zarr_format: ZarrFormat
+    ) -> TypeGuard[Literal["complex128", ">c16", "<c16"]]:
+        """
+        Check that the input is a valid JSON representation of a signed 16-bit integer.
+        """
+        if zarr_format == 2:
+            return data in cls._zarr_v2_names
+        elif zarr_format == 3:
+            return data == cls._zarr_v3_name
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+
+    def to_json(self, zarr_format: ZarrFormat) -> str:
+        if zarr_format == 2:
+            return self.to_dtype().str
+        elif zarr_format == 3:
+            return self._zarr_v3_name
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+
+    @classmethod
+    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
+        if zarr_format == 2:
+            return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
+        elif zarr_format == 3:
+            return cls()
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
 
     def default_value(self) -> np.complex128:
-        return np.complex128(0.0)
+        return self.to_dtype().type(0)
 
     def to_json_value(
         self, data: np.complex128, zarr_format: ZarrFormat
@@ -488,31 +924,66 @@ def to_json_value(
         return complex_to_json(data, zarr_format)
 
     def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.complex128:
-        if check_json_complex_float_v3(data):
+        if check_json_complex_float(data, zarr_format=zarr_format):
             return complex_from_json(data, dtype=self.to_dtype(), zarr_format=zarr_format)
         raise TypeError(f"Invalid type: {data}. Expected a complex float.")
 
 
 @dataclass(frozen=True, kw_only=True)
-class FixedLengthAsciiString(DTypeWrapper[np.dtypes.BytesDType[Any], np.bytes_]):
+class FixedLengthAscii(ZDType[np.dtypes.BytesDType[int], np.bytes_]):
     dtype_cls = np.dtypes.BytesDType
-    _zarr_v3_name = "fixed_length_ascii"
+    _zarr_v3_name = "numpy.fixed_length_ascii"
     item_size_bits: ClassVar[int] = 8
     length: int = 1
 
     @classmethod
-    def _from_dtype_unsafe(cls, dtype: np.dtypes.BytesDType) -> Self:
+    def _from_dtype_unsafe(cls, dtype: np.dtypes.BytesDType[int]) -> Self:
         return cls(length=dtype.itemsize // (cls.item_size_bits // 8))
 
-    def to_dtype(self) -> np.dtypes.BytesDType:
+    def to_dtype(self) -> np.dtypes.BytesDType[int]:
         return self.dtype_cls(self.length)
 
+    @classmethod
+    def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[JSON]:
+        """
+        Check that the input is a valid JSON representation of a numpy S dtype.
+        """
+        if zarr_format == 2:
+            # match |S1, |S2, etc
+            return isinstance(data, str) and re.match(r"^\|S\d+$", data) is not None
+        elif zarr_format == 3:
+            return (
+                isinstance(data, dict)
+                and "name" in data
+                and data["name"] == cls._zarr_v3_name
+                and "configuration" in data
+                and isinstance(data["configuration"], dict)
+                and "length_bits" in data["configuration"]
+                and isinstance(data["configuration"]["length_bits"], int)
+            )
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+
+    def to_json(self, zarr_format: ZarrFormat) -> JSON:
+        if zarr_format == 2:
+            return self.to_dtype().str
+        elif zarr_format == 3:
+            return {
+                "name": self._zarr_v3_name,
+                "configuration": {"length_bits": self.length * self.item_size_bits},
+            }
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+
+    @classmethod
+    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
+        if zarr_format == 2:
+            return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
+        elif zarr_format == 3:
+            return cls(length=data["configuration"]["length_bits"] // cls.item_size_bits)  # type: ignore[arg-type, index, call-overload, operator]
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+
     def default_value(self) -> np.bytes_:
         return np.bytes_(b"")
 
-    def to_dict(self) -> dict[str, JSON]:
-        return {"name": self.get_name(zarr_format=3), "configuration": {"length": self.length}}
-
     def to_json_value(self, data: np.bytes_, *, zarr_format: ZarrFormat) -> str:
         return base64.standard_b64encode(data).decode("ascii")
 
@@ -523,38 +994,61 @@ def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.bytes_:
 
 
 @dataclass(frozen=True, kw_only=True)
-class FixedLengthBytes(DTypeWrapper[np.dtypes.VoidDType, np.void]):
-    dtype_cls = np.dtypes.VoidDType
-    _zarr_v3_name = "r*"
+class FixedLengthBytes(ZDType[np.dtypes.VoidDType[int], np.void]):
+    # np.dtypes.VoidDType is specified in an odd way in numpy
+    # it cannot be used to create instances of the dtype
+    # so we have to tell mypy to ignore this here
+    dtype_cls = np.dtypes.VoidDType  # type: ignore[assignment]
+    _zarr_v3_name = "numpy.void"
     item_size_bits: ClassVar[int] = 8
     length: int = 1
 
     @classmethod
-    def _from_dtype_unsafe(cls, dtype: np.dtypes.VoidDType) -> Self:
+    def _from_dtype_unsafe(cls, dtype: np.dtypes.VoidDType[int]) -> Self:
         return cls(length=dtype.itemsize // (cls.item_size_bits // 8))
 
-    def default_value(self) -> np.void:
-        return self.cast_value(("\x00" * self.length).encode("ascii"))
-
-    def to_dtype(self) -> np.dtypes.VoidDType:
+    def to_dtype(self) -> np.dtypes.VoidDType[int]:
         # Numpy does not allow creating a void type
         # by invoking np.dtypes.VoidDType directly
-        return np.dtype(f"V{self.length}")
+        return cast("np.dtypes.VoidDType[int]", np.dtype(f"V{self.length}"))
 
-    def get_name(self, zarr_format: ZarrFormat) -> str:
+    @classmethod
+    def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[JSON]:
         if zarr_format == 2:
-            return super().get_name(zarr_format=zarr_format)
-        # note that we don't return self._zarr_v3_name
-        # because the name is parametrized by the length
-        return f"r{self.length * self.item_size_bits}"
+            # Check that the dtype is |V1, |V2, ...
+            return isinstance(data, str) and re.match(r"^\|V\d+$", data) is not None
+        elif zarr_format == 3:
+            return (
+                isinstance(data, dict)
+                and "name" in data
+                and isinstance(data["name"], str)
+                and (re.match(r"^r\d+$", data["name"]) is not None)
+            )
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
 
-    def to_dict(self) -> dict[str, JSON]:
-        return {"name": self.get_name(zarr_format=3)}
+    def to_json(self, zarr_format: ZarrFormat) -> JSON:
+        if zarr_format == 2:
+            return self.to_dtype().str
+        elif zarr_format == 3:
+            return {"name": f"r{self.length * self.item_size_bits}"}
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+
+    @classmethod
+    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
+        if zarr_format == 2:
+            return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
+        elif zarr_format == 3:
+            return cls(length=int(data["name"][1:]) // cls.item_size_bits)  # type: ignore[arg-type, index, call-overload]
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
 
     @classmethod
-    def check_dtype(cls: type[Self], dtype: TDType) -> TypeGuard[np.dtypes.VoidDType[Any]]:
+    def check_dtype(cls: type[Self], dtype: _BaseDType) -> TypeGuard[np.dtypes.VoidDType[Any]]:
         """
-        Reject structured dtypes by ensuring that dtype.fields is None
+        Numpy void dtype comes in two forms:
+        * If the ``fields`` attribute is ``None``, then the dtype represents N raw bytes.
+        * If the ``fields`` attribute is not ``None``, then the dtype represents a structured dtype,
+
+        In this check we ensure that ``fields`` is ``None``.
 
         Parameters
         ----------
@@ -566,19 +1060,10 @@ def check_dtype(cls: type[Self], dtype: TDType) -> TypeGuard[np.dtypes.VoidDType
         Bool
             True if the dtype matches, False otherwise.
         """
-        return super().check_dtype(dtype) and dtype.fields is None
+        return cls.dtype_cls is type(dtype) and dtype.fields is None  # type: ignore[has-type]
 
-    @classmethod
-    def check_dict(cls, data: dict[str, JSON]) -> TypeGuard[dict[str, JSON]]:
-        # Overriding the base class implementation because the r* dtype
-        # does not have a name that will can appear in array metadata
-        # Instead, array metadata will contain names like "r8", "r16", etc
-        return (
-            isinstance(data, dict)
-            and "name" in data
-            and isinstance(data["name"], str)
-            and (re.match(r"^r\d+$", data["name"]) is not None)
-        )
+    def default_value(self) -> np.void:
+        return self.to_dtype().type(("\x00" * self.length).encode("ascii"))
 
     def to_json_value(self, data: np.void, *, zarr_format: ZarrFormat) -> str:
         return base64.standard_b64encode(data.tobytes()).decode("ascii")
@@ -590,63 +1075,123 @@ def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.void:
 
 
 @dataclass(frozen=True, kw_only=True)
-class FixedLengthUnicodeString(DTypeWrapper[np.dtypes.StrDType[int], np.str_]):
+class FixedLengthUnicode(ZDType[np.dtypes.StrDType[int], np.str_]):
     dtype_cls = np.dtypes.StrDType
-    _zarr_v3_name = "fixed_length_ucs4"
+    _zarr_v3_name = "numpy.fixed_length_ucs4"
     item_size_bits: ClassVar[int] = 32  # UCS4 is 32 bits per code point
     endianness: Endianness | None = "native"
     length: int = 1
 
     @classmethod
     def _from_dtype_unsafe(cls, dtype: np.dtypes.StrDType[int]) -> Self:
+        byte_order = cast("EndiannessNumpy", dtype.byteorder)
         return cls(
             length=dtype.itemsize // (cls.item_size_bits // 8),
-            endianness=endianness_from_numpy_str(dtype.byteorder),
+            endianness=endianness_from_numpy_str(byte_order),
         )
 
     def to_dtype(self) -> np.dtypes.StrDType[int]:
-        return cast(
-            np.dtypes.StrDType[int],
-            self.dtype_cls(self.length).newbyteorder(endianness_to_numpy_str(self.endianness)),
-        )
+        byte_order = endianness_to_numpy_str(self.endianness)
+        return self.dtype_cls(self.length).newbyteorder(byte_order)
+
+    @classmethod
+    def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[JSON]:
+        """
+        Check that the input is a valid JSON representation of a numpy S dtype.
+        """
+        if zarr_format == 2:
+            # match >U1, <U2, etc
+            return isinstance(data, str) and re.match(r"^[><]U\d+$", data) is not None
+        elif zarr_format == 3:
+            return (
+                isinstance(data, dict)
+                and "name" in data
+                and data["name"] == cls._zarr_v3_name
+                and "configuration" in data
+                and isinstance(data["configuration"], dict)
+                and "length_bits" in data["configuration"]
+                and isinstance(data["configuration"]["length_bits"], int)
+            )
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+
+    def to_json(self, zarr_format: ZarrFormat) -> JSON:
+        if zarr_format == 2:
+            return self.to_dtype().str
+        elif zarr_format == 3:
+            return {
+                "name": self._zarr_v3_name,
+                "configuration": {"length_bits": self.length * self.item_size_bits},
+            }
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+
+    @classmethod
+    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
+        if zarr_format == 2:
+            return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
+        elif zarr_format == 3:
+            return cls(length=data["configuration"]["length_bits"] // cls.item_size_bits)  # type: ignore[arg-type, index, call-overload, operator]
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
 
     def default_value(self) -> np.str_:
         return np.str_("")
 
-    def to_dict(self) -> dict[str, JSON]:
-        return {"name": self.get_name(zarr_format=3), "configuration": {"length": self.length}}
-
     def to_json_value(self, data: np.str_, *, zarr_format: ZarrFormat) -> str:
         return str(data)
 
     def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.str_:
         if not check_json_str(data):
             raise TypeError(f"Invalid type: {data}. Expected a string.")
-        return self.cast_value(data)
+        return self.to_dtype().type(data)
+
+
+_NUMPY_SUPPORTS_VLEN_STRING = hasattr(np.dtypes, "StringDType")
 
 
 if _NUMPY_SUPPORTS_VLEN_STRING:
 
     @dataclass(frozen=True, kw_only=True)
-    class VariableLengthString(DTypeWrapper[np.dtypes.StringDType, str]):
+    class VariableLengthString(ZDType[np.dtypes.StringDType, str]):  # type: ignore[type-var]
         dtype_cls = np.dtypes.StringDType
-        _zarr_v3_name = "variable_length_utf8"
+        _zarr_v3_name = "numpy.variable_length_utf8"
 
         @classmethod
         def _from_dtype_unsafe(cls, dtype: np.dtypes.StringDType) -> Self:
             return cls()
 
-        def default_value(self) -> str:
-            return ""
+        def to_dtype(self) -> np.dtypes.StringDType:
+            return self.dtype_cls()
 
-        def cast_value(self, value: object) -> str:
-            return str(value)
+        @classmethod
+        def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[JSON]:
+            """
+            Check that the input is a valid JSON representation of a numpy string dtype.
+            """
+            if zarr_format == 2:
+                # TODO: take the entire metadata document in here, and
+                # check the compressors / filters for vlen-utf8
+                # Note that we are checking for the object dtype name.
+                return data == "|O"
+            elif zarr_format == 3:
+                return data == cls._zarr_v3_name
+            raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+
+        def to_json(self, zarr_format: ZarrFormat) -> JSON:
+            if zarr_format == 2:
+                # Note: unlike many other numpy data types, we don't serialize the .str attribute
+                # of the data type to JSON. This is because Zarr was using `|O` for strings before the
+                # numpy variable length string data type existed, and we want to be consistent with
+                # that practice
+                return "|O"
+            elif zarr_format == 3:
+                return self._zarr_v3_name
+            raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
 
-        def to_dict(self) -> dict[str, JSON]:
-            return {"name": self.get_name(zarr_format=3)}
+        @classmethod
+        def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
+            return cls()
 
-        def to_dtype(self) -> np.dtypes.StringDType:
-            return self.dtype_cls()
+        def default_value(self) -> str:
+            return ""
 
         def to_json_value(self, data: object, *, zarr_format: ZarrFormat) -> str:
             return str(data)
@@ -654,37 +1199,55 @@ def to_json_value(self, data: object, *, zarr_format: ZarrFormat) -> str:
         def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> str:
             if not check_json_str(data):
                 raise TypeError(f"Invalid type: {data}. Expected a string.")
-            return self.cast_value(data)
+            return data
 
 else:
 
     @dataclass(frozen=True, kw_only=True)
-    class VariableLengthString(DTypeWrapper[np.dtypes.ObjectDType, str]):
+    class VariableLengthString(ZDType[np.dtypes.ObjectDType, str]):  # type: ignore[no-redef]
         dtype_cls = np.dtypes.ObjectDType
-        _zarr_v3_name = "variable_length_utf8"
+        _zarr_v3_name = "numpy.variable_length_utf8"
 
         @classmethod
         def _from_dtype_unsafe(cls, dtype: np.dtypes.ObjectDType) -> Self:
             return cls()
 
         def to_dtype(self) -> np.dtypes.ObjectDType:
-            return cast(np.dtypes.ObjectDType, self.dtype_cls())
+            return self.dtype_cls()
 
-        def cast_value(self, value: object) -> str:
-            return str(value)
+        @classmethod
+        def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[JSON]:
+            """
+            Check that the input is a valid JSON representation of a numpy O dtype.
+            """
+            if zarr_format == 2:
+                # TODO: take the entire metadata document in here, and
+                # check the compressors / filters for vlen-utf8
+                return data == "|O"
+            elif zarr_format == 3:
+                return data == cls._zarr_v3_name
+            raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+
+        def to_json(self, zarr_format: ZarrFormat) -> JSON:
+            if zarr_format == 2:
+                return self.to_dtype().str
+            elif zarr_format == 3:
+                return self._zarr_v3_name
+            raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+
+        @classmethod
+        def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
+            return cls()
 
         def default_value(self) -> str:
             return ""
 
-        def to_dict(self) -> dict[str, JSON]:
-            return {"name": self.get_name(zarr_format=3)}
-
         def to_json_value(self, data: str, *, zarr_format: ZarrFormat) -> str:
             return data
 
         def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> str:
             """
-            String literals pass through
+            Strings pass through
             """
             if not check_json_str(data):
                 raise TypeError(f"Invalid type: {data}. Expected a string.")
@@ -696,35 +1259,72 @@ def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> str:
 
 
 @dataclass(frozen=True, kw_only=True)
-class DateTime64(DTypeWrapper[np.dtypes.DateTime64DType, np.datetime64]):
-    dtype_cls = np.dtypes.DateTime64DType
-    _zarr_v3_name = "datetime64"
+class DateTime64(ZDType[np.dtypes.DateTime64DType, np.datetime64]):
+    dtype_cls = np.dtypes.DateTime64DType  # type: ignore[assignment]
+    _zarr_v3_name = "numpy.datetime64"
     unit: DateUnit | TimeUnit = "s"
-    endianness: Endianness = "native"
-
-    def default_value(self) -> np.datetime64:
-        return np.datetime64("NaT")
-
-    def to_dict(self) -> dict[str, JSON]:
-        return {"name": self.get_name(zarr_format=3), "configuration": {"unit": self.unit}}
+    endianness: Endianness | None = "native"
 
     @classmethod
     def _from_dtype_unsafe(cls, dtype: np.dtypes.DateTime64DType) -> Self:
-        unit = dtype.name[dtype.name.rfind("[") + 1 : dtype.name.rfind("]")]
+        unit: DateUnit | TimeUnit = dtype.name[dtype.name.rfind("[") + 1 : dtype.name.rfind("]")]  # type: ignore[assignment]
         if unit not in get_args(DateUnit) and unit not in get_args(TimeUnit):
             raise DataTypeValidationError('Invalid unit for "numpy.datetime64"')
-        return cls(unit=unit, endianness=endianness_from_numpy_str(dtype.byteorder))
-
-    def cast_value(self, value: object) -> np.datetime64:
-        return cast(np.datetime64, self.to_dtype().type(value, self.unit))
+        byteorder = cast("EndiannessNumpy", dtype.byteorder)
+        return cls(unit=unit, endianness=endianness_from_numpy_str(byteorder))
 
     def to_dtype(self) -> np.dtypes.DateTime64DType:
         # Numpy does not allow creating datetime64 via
         # np.dtypes.DateTime64Dtype()
-        return np.dtype(f"datetime64[{self.unit}]").newbyteorder(
-            endianness_to_numpy_str(self.endianness)
+        return cast(
+            "np.dtypes.DateTime64DType",
+            np.dtype(f"datetime64[{self.unit}]").newbyteorder(
+                endianness_to_numpy_str(self.endianness)
+            ),
         )
 
+    @classmethod
+    def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[JSON]:
+        if zarr_format == 2:
+            # match <M[ns], >M[M], etc
+            # consider making this a standalone function
+            return (
+                isinstance(data, str)
+                and len(data) in (6, 7)
+                and data[0] in (">", "<")
+                and data[1:4] == "M8["
+                and data[4:-1] in get_args(TimeUnit) + get_args(DateUnit)
+                and data[-1] == "]"
+            )
+        elif zarr_format == 3:
+            return (
+                isinstance(data, dict)
+                and "name" in data
+                and data["name"] == cls._zarr_v3_name
+                and "configuration" in data
+                and "unit" in data["configuration"]
+                and data["configuration"]["unit"] in get_args(DateUnit) + get_args(TimeUnit)
+            )
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+
+    def default_value(self) -> np.datetime64:
+        return np.datetime64("NaT")
+
+    def to_json(self, zarr_format: ZarrFormat) -> JSON:
+        if zarr_format == 2:
+            return self.to_dtype().str
+        elif zarr_format == 3:
+            return {"name": self._zarr_v3_name, "configuration": {"unit": self.unit}}
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+
+    @classmethod
+    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
+        if zarr_format == 2:
+            return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
+        elif zarr_format == 3:
+            return cls(unit=data["configuration"]["unit"])  # type: ignore[arg-type, index, call-overload]
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+
     def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.datetime64:
         if check_json_int(data):
             return datetime_from_json(data, self.unit)
@@ -735,19 +1335,19 @@ def to_json_value(self, data: np.datetime64, *, zarr_format: ZarrFormat) -> int:
 
 
 @dataclass(frozen=True, kw_only=True)
-class Structured(DTypeWrapper[np.dtypes.VoidDType, np.void]):
-    dtype_cls = np.dtypes.VoidDType
+class Structured(ZDType[np.dtypes.VoidDType[int], np.void]):
+    dtype_cls = np.dtypes.VoidDType  # type: ignore[assignment]
     _zarr_v3_name = "structured"
-    fields: tuple[tuple[str, DTypeWrapper[Any, Any]], ...]
+    fields: tuple[tuple[str, ZDType[_BaseDType, _BaseScalar]], ...]
 
     def default_value(self) -> np.void:
         return self.cast_value(0)
 
     def cast_value(self, value: object) -> np.void:
-        return cast(np.void, np.array([value], dtype=self.to_dtype())[0])
+        return cast("np.void", np.array([value], dtype=self.to_dtype())[0])
 
     @classmethod
-    def check_dtype(cls, dtype: np.dtypes.DTypeLike) -> TypeGuard[np.dtypes.VoidDType]:
+    def check_dtype(cls, dtype: _BaseDType) -> TypeGuard[np.dtypes.VoidDType[int]]:
         """
         Check that this dtype is a numpy structured dtype
 
@@ -764,54 +1364,90 @@ def check_dtype(cls, dtype: np.dtypes.DTypeLike) -> TypeGuard[np.dtypes.VoidDTyp
         return super().check_dtype(dtype) and dtype.fields is not None
 
     @classmethod
-    def _from_dtype_unsafe(cls, dtype: np.dtypes.VoidDType) -> Self:
-        from zarr.core.dtype import get_data_type_from_numpy
+    def _from_dtype_unsafe(cls, dtype: np.dtypes.VoidDType[int]) -> Self:
+        from zarr.core.dtype import get_data_type_from_native_dtype
 
-        fields: list[tuple[str, DTypeWrapper[Any, Any]]] = []
+        fields: list[tuple[str, ZDType[Any, Any]]] = []
 
         if dtype.fields is None:
             raise ValueError("numpy dtype has no fields")
 
-        for key, (dtype_instance, _) in dtype.fields.items():
-            dtype_wrapped = get_data_type_from_numpy(dtype_instance)
+        # fields of a structured numpy dtype are either 2-tuples or 3-tuples. we only
+        # care about the first element in either case.
+        for key, (dtype_instance, *_) in dtype.fields.items():
+            dtype_wrapped = get_data_type_from_native_dtype(dtype_instance)
             fields.append((key, dtype_wrapped))
 
         return cls(fields=tuple(fields))
 
-    def get_name(self, zarr_format: ZarrFormat) -> str | list[tuple[str, str]]:
+    def to_json(self, zarr_format: ZarrFormat) -> JSON:
+        fields = [
+            (f_name, f_dtype.to_json(zarr_format=zarr_format)) for f_name, f_dtype in self.fields
+        ]
         if zarr_format == 2:
-            return [[k, d.get_name(zarr_format=2)] for k, d in self.fields]
-        return self._zarr_v3_name
-
-    def to_dict(self) -> dict[str, JSON]:
-        base_dict = {"name": self.get_name(zarr_format=3)}
-        field_configs = [(f_name, f_dtype.to_dict()) for f_name, f_dtype in self.fields]
-        base_dict["configuration"] = {"fields": field_configs}
-        return base_dict
+            return fields
+        elif zarr_format == 3:
+            base_dict = {"name": self._zarr_v3_name}
+            base_dict["configuration"] = {"fields": fields}  # type: ignore[assignment]
+            return cast("JSON", base_dict)
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
 
     @classmethod
-    def check_dict(cls, data: JSON) -> TypeGuard[JSON]:
-        return (
-            isinstance(data, dict)
-            and "name" in data
-            and "configuration" in data
-            and "fields" in data["configuration"]
-        )
+    def check_json(
+        cls, data: JSON, zarr_format: ZarrFormat
+    ) -> TypeGuard[dict[str, JSON] | list[Any]]:
+        # the actual JSON form is recursive and hard to annotate, so we give up and do
+        # list[Any] for now
+        if zarr_format == 2:
+            return (
+                not isinstance(data, str)
+                and isinstance(data, Sequence)
+                and all(
+                    not isinstance(field, str) and isinstance(field, Sequence) and len(field) == 2
+                    for field in data
+                )
+            )
+        elif zarr_format == 3:
+            return (
+                isinstance(data, dict)
+                and "name" in data
+                and "configuration" in data
+                and isinstance(data["configuration"], dict)
+                and "fields" in data["configuration"]
+            )
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
 
     @classmethod
-    def from_dict(cls, data: dict[str, JSON]) -> Self:
-        if cls.check_dict(data):
-            from zarr.core.dtype import get_data_type_from_dict
-
-            fields = tuple(
-                (f_name, get_data_type_from_dict(f_dtype))
-                for f_name, f_dtype in data["configuration"]["fields"]
-            )
-            return cls(fields=fields)
+    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
+        from zarr.core.dtype import get_data_type_from_json
+
+        if cls.check_json(data, zarr_format=zarr_format):
+            if zarr_format == 2:
+                # structured dtypes are constructed directly from a list of lists
+                return cls(
+                    fields=tuple(  # type: ignore[misc]
+                        (f_name, get_data_type_from_json(f_dtype, zarr_format=zarr_format))
+                        for f_name, f_dtype in data
+                    )
+                )
+            elif zarr_format == 3:  # noqa: SIM102
+                if isinstance(data, dict) and "configuration" in data:
+                    config = data["configuration"]
+                    if isinstance(config, dict) and "fields" in config:
+                        meta_fields = config["fields"]
+                        fields = tuple(
+                            (f_name, get_data_type_from_json(f_dtype, zarr_format=zarr_format))
+                            for f_name, f_dtype in meta_fields
+                        )
+                        return cls(fields=fields)
+            raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
         raise DataTypeValidationError(f"Invalid JSON representation of data type {cls}.")
 
-    def to_dtype(self) -> np.dtypes.VoidDType:
-        return cast(np.void, np.dtype([(key, dtype.to_dtype()) for (key, dtype) in self.fields]))
+    def to_dtype(self) -> np.dtypes.VoidDType[int]:
+        return cast(
+            "np.dtypes.VoidDType[int]",
+            np.dtype([(key, dtype.to_dtype()) for (key, dtype) in self.fields]),
+        )
 
     def to_json_value(self, data: np.generic, *, zarr_format: ZarrFormat) -> str:
         return bytes_to_json(data.tobytes(), zarr_format)
@@ -822,3 +1458,69 @@ def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.void:
         as_bytes = bytes_from_json(data, zarr_format=zarr_format)
         dtype = self.to_dtype()
         return cast(np.void, np.array([as_bytes], dtype=dtype.str).view(dtype)[0])
+
+
+def endianness_to_numpy_str(endianness: Endianness | None) -> EndiannessNumpy:
+    """
+    Convert an endianness literal to its numpy string representation.
+
+    Parameters
+    ----------
+    endianness : Endianness or None
+        The endianness to convert.
+
+    Returns
+    -------
+    Literal[">", "<", "=", "|"]
+        The numpy string representation of the endianness.
+
+    Raises
+    ------
+    ValueError
+        If the endianness is invalid.
+    """
+    match endianness:
+        case "little":
+            return "<"
+        case "big":
+            return ">"
+        case "native":
+            return "="
+        case None:
+            return "|"
+    raise ValueError(
+        f"Invalid endianness: {endianness}. Expected one of {get_args(Endianness)} or None"
+    )
+
+
+def endianness_from_numpy_str(endianness: EndiannessNumpy) -> Endianness | None:
+    """
+    Convert a numpy endianness string literal to a human-readable literal value.
+
+    Parameters
+    ----------
+    endianness : Literal[">", "<", "=", "|"]
+        The numpy string representation of the endianness.
+
+    Returns
+    -------
+    Endianness or None
+        The human-readable representation of the endianness.
+
+    Raises
+    ------
+    ValueError
+        If the endianness is invalid.
+    """
+    match endianness:
+        case "<":
+            return "little"
+        case ">":
+            return "big"
+        case "=":
+            return "native"
+        case "|":
+            return None
+    raise ValueError(
+        f"Invalid endianness: {endianness}. Expected one of {get_args(EndiannessNumpy)}"
+    )
diff --git a/src/zarr/core/dtype/common.py b/src/zarr/core/dtype/common.py
index 1dbf22c3c2..2c4910338e 100644
--- a/src/zarr/core/dtype/common.py
+++ b/src/zarr/core/dtype/common.py
@@ -2,7 +2,7 @@
 
 import base64
 from collections.abc import Sequence
-from typing import TYPE_CHECKING, Any, Literal, TypeGuard, cast, get_args
+from typing import TYPE_CHECKING, Any, Literal, TypeGuard, cast
 
 import numpy as np
 
@@ -11,81 +11,12 @@
     from zarr.core.dtype._numpy import DateUnit, TimeUnit
 
 Endianness = Literal["little", "big", "native"]
-EndiannessNumpy = Literal[">", "<", "=", "|"]
 JSONFloat = float | Literal["NaN", "Infinity", "-Infinity"]
 
-_NUMPY_SUPPORTS_VLEN_STRING = hasattr(np.dtypes, "StringDType")
-
 
 class DataTypeValidationError(ValueError): ...
 
 
-def endianness_to_numpy_str(endianness: Endianness | None) -> EndiannessNumpy:
-    """
-    Convert an endianness literal to its numpy string representation.
-
-    Parameters
-    ----------
-    endianness : Endianness or None
-        The endianness to convert.
-
-    Returns
-    -------
-    Literal[">", "<", "=", "|"]
-        The numpy string representation of the endianness.
-
-    Raises
-    ------
-    ValueError
-        If the endianness is invalid.
-    """
-    match endianness:
-        case "little":
-            return "<"
-        case "big":
-            return ">"
-        case "native":
-            return "="
-        case None:
-            return "|"
-    raise ValueError(
-        f"Invalid endianness: {endianness}. Expected one of {get_args(Endianness)} or None"
-    )
-
-
-def endianness_from_numpy_str(endianness: EndiannessNumpy) -> Endianness | None:
-    """
-    Convert a numpy endianness string literal to a human-readable literal value.
-
-    Parameters
-    ----------
-    endianness : Literal[">", "<", "=", "|"]
-        The numpy string representation of the endianness.
-
-    Returns
-    -------
-    Endianness or None
-        The human-readable representation of the endianness.
-
-    Raises
-    ------
-    ValueError
-        If the endianness is invalid.
-    """
-    match endianness:
-        case "<":
-            return "little"
-        case ">":
-            return "big"
-        case "=":
-            return "native"
-        case "|":
-            return None
-    raise ValueError(
-        f"Invalid endianness: {endianness}. Expected one of {get_args(EndiannessNumpy)}"
-    )
-
-
 def check_json_bool(data: JSON) -> TypeGuard[bool]:
     """
     Check if a JSON value is a boolean.
diff --git a/src/zarr/core/dtype/registry.py b/src/zarr/core/dtype/registry.py
index d4f1f03258..0d07ab2b9d 100644
--- a/src/zarr/core/dtype/registry.py
+++ b/src/zarr/core/dtype/registry.py
@@ -1,20 +1,22 @@
 from __future__ import annotations
 
 from dataclasses import dataclass, field
-from typing import TYPE_CHECKING, Any, Self
+from typing import TYPE_CHECKING, Self
 
 from zarr.core.dtype.common import DataTypeValidationError
 
 if TYPE_CHECKING:
     from importlib.metadata import EntryPoint
 
-    from zarr.core.common import JSON
-    from zarr.core.dtype.wrapper import DTypeWrapper, TDType
+    from zarr.core.common import JSON, ZarrFormat
+    from zarr.core.dtype.wrapper import ZDType, _BaseDType, _BaseScalar
 
 
 @dataclass(frozen=True, kw_only=True)
 class DataTypeRegistry:
-    contents: dict[str, type[DTypeWrapper[Any, Any]]] = field(default_factory=dict, init=False)
+    contents: dict[str, type[ZDType[_BaseDType, _BaseScalar]]] = field(
+        default_factory=dict, init=False
+    )
     lazy_load_list: list[EntryPoint] = field(default_factory=list, init=False)
 
     def lazy_load(self) -> None:
@@ -23,15 +25,15 @@ def lazy_load(self) -> None:
 
         self.lazy_load_list.clear()
 
-    def register(self: Self, key: str, cls: type[DTypeWrapper[Any, Any]]) -> None:
+    def register(self: Self, key: str, cls: type[ZDType[_BaseDType, _BaseScalar]]) -> None:
         # don't register the same dtype twice
         if key not in self.contents or self.contents[key] != cls:
             self.contents[key] = cls
 
-    def get(self, key: str) -> type[DTypeWrapper[Any, Any]]:
+    def get(self, key: str) -> type[ZDType[_BaseDType, _BaseScalar]]:
         return self.contents[key]
 
-    def match_dtype(self, dtype: TDType) -> DTypeWrapper[Any, Any]:
+    def match_dtype(self, dtype: _BaseDType) -> ZDType[_BaseDType, _BaseScalar]:
         self.lazy_load()
         for val in self.contents.values():
             try:
@@ -40,11 +42,11 @@ def match_dtype(self, dtype: TDType) -> DTypeWrapper[Any, Any]:
                 pass
         raise ValueError(f"No data type wrapper found that matches dtype '{dtype}'")
 
-    def match_json(self, data: JSON) -> DTypeWrapper[Any, Any]:
+    def match_json(self, data: JSON, zarr_format: ZarrFormat) -> ZDType[_BaseDType, _BaseScalar]:
         self.lazy_load()
         for val in self.contents.values():
             try:
-                return val.from_dict(data)
+                return val.from_json(data, zarr_format=zarr_format)
             except DataTypeValidationError:
                 pass
         raise ValueError(f"No data type wrapper found that matches {data}")
diff --git a/src/zarr/core/dtype/wrapper.py b/src/zarr/core/dtype/wrapper.py
index dc3a0cc5d2..8707c3cda0 100644
--- a/src/zarr/core/dtype/wrapper.py
+++ b/src/zarr/core/dtype/wrapper.py
@@ -2,25 +2,30 @@
 
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
-from typing import TYPE_CHECKING, Any, ClassVar, Generic, Self, TypeGuard, TypeVar, cast
+from typing import TYPE_CHECKING, ClassVar, Generic, Self, TypeGuard, TypeVar
 
 import numpy as np
 
-from zarr.abc.metadata import Metadata
 from zarr.core.dtype.common import DataTypeValidationError
 
 if TYPE_CHECKING:
     from zarr.core.common import JSON, ZarrFormat
 
-TScalar = TypeVar("TScalar", bound=np.generic | str)
+# This the upper bound for the scalar types we support. It's numpy scalars + str,
+# because the new variable-length string dtype in numpy does not have a corresponding scalar type
+_BaseScalar = np.generic | str
+# This is the bound for the dtypes that we support. If we support non-numpy dtypes,
+# then this bound will need to be widened.
+_BaseDType = np.dtype[np.generic]
+TScalar = TypeVar("TScalar", bound=_BaseScalar)
 # TODO: figure out an interface or protocol that non-numpy dtypes can use
-TDType = TypeVar("TDType", bound=np.dtype[Any])
+TDType = TypeVar("TDType", bound=_BaseDType)
 
 
 @dataclass(frozen=True, kw_only=True)
-class DTypeWrapper(Generic[TDType, TScalar], ABC, Metadata):
+class ZDType(Generic[TDType, TScalar], ABC):
     """
-    Abstract base class for wrapping numpy dtypes.
+    Abstract base class for wrapping native array data types, e.g. numpy dtypes
 
     Attributes
     ----------
@@ -32,13 +37,30 @@ class DTypeWrapper(Generic[TDType, TScalar], ABC, Metadata):
         have names that depend on their configuration.
     """
 
-    # this class will create a numpy dtype
+    # this class will create a native data type
     # mypy currently disallows class variables to contain type parameters
-    # but it seems like it should be OK for us to use it here:
+    # but it seems OK for us to use it here:
     # https://github.com/python/typing/discussions/1424#discussioncomment-7989934
     dtype_cls: ClassVar[type[TDType]]  # type: ignore[misc]
     _zarr_v3_name: ClassVar[str]
 
+    @classmethod
+    def check_dtype(cls: type[Self], dtype: _BaseDType) -> TypeGuard[TDType]:
+        """
+        Check that a data type matches the dtype_cls class attribute. Used as a type guard.
+
+        Parameters
+        ----------
+        dtype : TDType
+            The dtype to check.
+
+        Returns
+        -------
+        Bool
+            True if the dtype matches, False otherwise.
+        """
+        return type(dtype) is cls.dtype_cls
+
     @classmethod
     def from_dtype(cls: type[Self], dtype: TDType) -> Self:
         """
@@ -81,7 +103,7 @@ def _from_dtype_unsafe(cls: type[Self], dtype: TDType) -> Self:
         Self
             The wrapped dtype.
         """
-        raise NotImplementedError
+        ...
 
     @abstractmethod
     def to_dtype(self: Self) -> TDType:
@@ -93,26 +115,7 @@ def to_dtype(self: Self) -> TDType:
         TDType
             The unwrapped dtype.
         """
-        raise NotImplementedError
-
-    def cast_value(self: Self, value: object) -> TScalar:
-        """
-        Cast a value to an instance of the scalar type.
-        This implementation assumes a numpy-style dtype class that has a
-        ``type`` method for casting scalars. Non-numpy dtypes will need to
-        override this method.
-
-        Parameters
-        ----------
-        value : object
-            The value to cast.
-
-        Returns
-        -------
-        TScalar
-            The cast value.
-        """
-        return cast(TScalar, self.to_dtype().type(value))
+        ...
 
     @abstractmethod
     def default_value(self) -> TScalar:
@@ -129,24 +132,8 @@ def default_value(self) -> TScalar:
         ...
 
     @classmethod
-    def check_dtype(cls: type[Self], dtype: TDType) -> TypeGuard[TDType]:
-        """
-        Check that a data type matches the dtype_cls class attribute. Used as a type guard.
-
-        Parameters
-        ----------
-        dtype : TDType
-            The dtype to check.
-
-        Returns
-        -------
-        Bool
-            True if the dtype matches, False otherwise.
-        """
-        return type(dtype) is cls.dtype_cls
-
-    @classmethod
-    def check_dict(cls: type[Self], data: dict[str, JSON]) -> TypeGuard[dict[str, JSON]]:
+    @abstractmethod
+    def check_json(cls: type[Self], data: JSON, zarr_format: ZarrFormat) -> TypeGuard[JSON]:
         """
         Check that a JSON representation of a data type matches the dtype_cls class attribute. Used
         as a type guard. This base implementation checks that the input is a dictionary,
@@ -158,87 +145,75 @@ def check_dict(cls: type[Self], data: dict[str, JSON]) -> TypeGuard[dict[str, JS
         data : JSON
             The JSON representation of the data type.
 
+        zarr_format : ZarrFormat
+            The zarr format version.
+
         Returns
         -------
         Bool
             True if the JSON representation matches, False otherwise.
         """
-        return "name" in data and data["name"] == cls._zarr_v3_name
+        ...
 
     @abstractmethod
-    def to_dict(self) -> dict[str, JSON]:
+    def to_json(self, zarr_format: ZarrFormat) -> JSON:
         """
-        Convert the wrapped data type to a dictionary.
+        Convert the wrapped data type to a JSON-serializable form.
+
+        Parameters
+        ----------
+        zarr_format : ZarrFormat
+            The zarr format version.
 
         Returns
         -------
-        dict[str, JSON]
-            The dictionary representation of the wrapped data type
+        JSON
+            The JSON-serializable representation of the wrapped data type
         """
-        raise NotImplementedError
+        ...
 
     @classmethod
-    def from_dict(cls: type[Self], data: dict[str, JSON]) -> Self:
+    def from_json(cls: type[Self], data: JSON, zarr_format: ZarrFormat) -> Self:
         """
         Wrap a JSON representation of a data type.
 
         Parameters
         ----------
-        data : dict[str, JSON]
+        data : JSON
             The JSON representation of the data type.
 
+        zarr_format : ZarrFormat
+            The zarr format version.
+
         Returns
         -------
         Self
             The wrapped data type.
         """
-        if cls.check_dict(data):
-            return cls._from_dict_unsafe(data)
-        raise DataTypeValidationError(f"Invalid JSON representation of data type {cls}.")
+        if cls.check_json(data, zarr_format=zarr_format):
+            return cls._from_json_unsafe(data, zarr_format=zarr_format)
+        raise DataTypeValidationError(f"Invalid JSON representation of data type {cls}: {data}")
 
     @classmethod
-    def _from_dict_unsafe(cls: type[Self], data: dict[str, JSON]) -> Self:
+    @abstractmethod
+    def _from_json_unsafe(cls: type[Self], data: JSON, zarr_format: ZarrFormat) -> Self:
         """
         Wrap a JSON representation of a data type.
 
         Parameters
         ----------
-        data : dict[str, JSON]
+        data : JSON
             The JSON representation of the data type.
 
-        Returns
-        -------
-        Self
-            The wrapped data type.
-        """
-        config = data.get("configuration", {})
-        return cls(**config)
-
-    def get_name(self, zarr_format: ZarrFormat) -> str:
-        """
-        Return the name of the wrapped data type.
-
-        Parameters
-        ----------
         zarr_format : ZarrFormat
             The zarr format version.
 
         Returns
         -------
-        str
-            The name of the wrapped data type.
-
-        Notes
-        -----
-        This is a method, rather than an attribute, because the name of the data type may depend on
-        parameters that are not known until a concrete data type is wrapped.
-
-        As the names of data types vary between zarr versions, this method takes a ``zarr_format``
-        parameter
+        Self
+            The wrapped data type.
         """
-        if zarr_format == 2:
-            return self.to_dtype().str
-        return self._zarr_v3_name
+        ...
 
     @abstractmethod
     def to_json_value(self, data: TScalar, *, zarr_format: ZarrFormat) -> JSON:
@@ -255,9 +230,9 @@ def to_json_value(self, data: TScalar, *, zarr_format: ZarrFormat) -> JSON:
         Returns
         -------
         JSON
-            The JSON-serializable format.
+            The JSON-serializable form of the scalar.
         """
-        raise NotImplementedError
+        ...
 
     @abstractmethod
     def from_json_value(self: Self, data: JSON, *, zarr_format: ZarrFormat) -> TScalar:
@@ -274,6 +249,6 @@ def from_json_value(self: Self, data: JSON, *, zarr_format: ZarrFormat) -> TScal
         Returns
         -------
         TScalar
-            The numpy scalar.
+            The native scalar value.
         """
-        raise NotImplementedError
+        ...
diff --git a/src/zarr/core/metadata/v2.py b/src/zarr/core/metadata/v2.py
index 94c69602af..d26ca52353 100644
--- a/src/zarr/core/metadata/v2.py
+++ b/src/zarr/core/metadata/v2.py
@@ -3,13 +3,13 @@
 import base64
 import warnings
 from collections.abc import Iterable
-from typing import TYPE_CHECKING, TypedDict, cast
+from typing import TYPE_CHECKING, TypedDict
 
 import numcodecs.abc
 
 from zarr.abc.metadata import Metadata
-from zarr.core.dtype import get_data_type_from_numpy
-from zarr.core.dtype.wrapper import DTypeWrapper
+from zarr.core.dtype import get_data_type_from_native_dtype
+from zarr.core.dtype.wrapper import TDType, TScalar, ZDType, _BaseDType, _BaseScalar
 
 if TYPE_CHECKING:
     from typing import Any, Literal, Self
@@ -45,7 +45,7 @@ class ArrayV2MetadataDict(TypedDict):
 class ArrayV2Metadata(Metadata):
     shape: ChunkCoords
     chunks: ChunkCoords
-    dtype: DTypeWrapper[Any, Any]
+    dtype: ZDType[_BaseDType, _BaseScalar]
     fill_value: int | float | str | bytes | None = 0
     order: MemoryOrder = "C"
     filters: tuple[numcodecs.abc.Codec, ...] | None = None
@@ -58,7 +58,7 @@ def __init__(
         self,
         *,
         shape: ChunkCoords,
-        dtype: DTypeWrapper[Any, Any],
+        dtype: ZDType[TDType, TScalar],
         chunks: ChunkCoords,
         fill_value: Any,
         order: MemoryOrder,
@@ -73,7 +73,7 @@ def __init__(
         shape_parsed = parse_shapelike(shape)
         chunks_parsed = parse_shapelike(chunks)
         # TODO: remove this
-        if not isinstance(dtype, DTypeWrapper):
+        if not isinstance(dtype, ZDType):
             raise TypeError
         compressor_parsed = parse_compressor(compressor)
         order_parsed = parse_indexing_order(order)
@@ -122,7 +122,7 @@ def from_dict(cls, data: dict[str, Any]) -> ArrayV2Metadata:
         _data = data.copy()
         # check that the zarr_format attribute is correct
         _ = parse_zarr_format(_data.pop("zarr_format"))
-        dtype = get_data_type_from_numpy(_data["dtype"])
+        dtype = get_data_type_from_native_dtype(_data["dtype"])
         _data["dtype"] = dtype
         if dtype.to_dtype().kind in "SV":
             fill_value_encoded = _data.get("fill_value")
@@ -163,6 +163,10 @@ def to_dict(self) -> dict[str, JSON]:
 
         if zarray_dict["filters"] is not None:
             raw_filters = zarray_dict["filters"]
+            # TODO: remove this when we can stratically type the output JSON data structure
+            # entirely
+            if not isinstance(raw_filters, list | tuple):
+                raise TypeError("Invalid type for filters. Expected a list or tuple.")
             new_filters = []
             for f in raw_filters:
                 if isinstance(f, numcodecs.abc.Codec):
@@ -172,13 +176,10 @@ def to_dict(self) -> dict[str, JSON]:
             zarray_dict["filters"] = new_filters
 
         if self.fill_value is not None:
-            # There's a relationship between self.dtype and self.fill_value
-            # that mypy isn't aware of. The fact that we have S or V dtype here
-            # means we should have a bytes-type fill_value.
-            fill_value = self.dtype.to_json_value(self.fill_value, zarr_format=2)
+            fill_value = self.dtype.to_json_value(self.fill_value, zarr_format=2)  # type: ignore[arg-type]
             zarray_dict["fill_value"] = fill_value
 
-        zarray_dict["dtype"] = self.dtype.get_name(zarr_format=2)
+        zarray_dict["dtype"] = self.dtype.to_json(zarr_format=2)
 
         return zarray_dict
 
@@ -312,22 +313,3 @@ def parse_fill_value(fill_value: object, dtype: np.dtype[Any]) -> Any:
             raise ValueError(msg) from e
 
     return fill_value
-
-
-def _default_compressor(
-    dtype: DTypeWrapper[Any, Any],
-) -> dict[str, JSON] | None:
-    """Get the default filters and compressor for a dtype.
-
-    https://numpy.org/doc/2.1/reference/generated/numpy.dtype.kind.html
-    """
-    default_compressor = config.get("array.v2_default_compressor")
-    return cast(dict[str, JSON] | None, default_compressor.get(dtype.kind, None))
-
-
-def _default_filters(
-    dtype: DTypeWrapper,
-) -> list[dict[str, JSON]] | None:
-    """Get the default filters and compressor for a dtype."""
-    default_filters = config.get("array.v2_default_filters")
-    return cast(list[dict[str, JSON]] | None, default_filters.get(dtype.kind, None))
diff --git a/src/zarr/core/metadata/v3.py b/src/zarr/core/metadata/v3.py
index 2c6e65037e..117bb3c573 100644
--- a/src/zarr/core/metadata/v3.py
+++ b/src/zarr/core/metadata/v3.py
@@ -5,9 +5,9 @@
 from zarr.abc.metadata import Metadata
 from zarr.core.buffer.core import default_buffer_prototype
 from zarr.core.dtype import (
-    DTypeWrapper,
     VariableLengthString,
-    get_data_type_from_dict,
+    ZDType,
+    get_data_type_from_json,
 )
 
 if TYPE_CHECKING:
@@ -17,6 +17,7 @@
     from zarr.core.buffer import Buffer, BufferPrototype
     from zarr.core.chunk_grids import ChunkGrid
     from zarr.core.common import JSON, ChunkCoords
+    from zarr.core.dtype.wrapper import _BaseDType, _BaseScalar
 
 
 import json
@@ -95,7 +96,7 @@ def validate_array_bytes_codec(codecs: tuple[Codec, ...]) -> ArrayBytesCodec:
     return abcs[0]
 
 
-def validate_codecs(codecs: tuple[Codec, ...], dtype: DTypeWrapper[Any, Any]) -> None:
+def validate_codecs(codecs: tuple[Codec, ...], dtype: ZDType[_BaseDType, _BaseScalar]) -> None:
     """Check that the codecs are valid for the given dtype"""
     from zarr.codecs.sharding import ShardingCodec
 
@@ -234,7 +235,7 @@ class ArrayV3MetadataDict(TypedDict):
 @dataclass(frozen=True, kw_only=True)
 class ArrayV3Metadata(Metadata):
     shape: ChunkCoords
-    data_type: DTypeWrapper[Any, Any]
+    data_type: ZDType[_BaseDType, _BaseScalar]
     chunk_grid: ChunkGrid
     chunk_key_encoding: ChunkKeyEncoding
     fill_value: Any
@@ -249,7 +250,7 @@ def __init__(
         self,
         *,
         shape: Iterable[int],
-        data_type: DTypeWrapper[Any, Any],
+        data_type: ZDType[_BaseDType, _BaseScalar],
         chunk_grid: dict[str, JSON] | ChunkGrid,
         chunk_key_encoding: ChunkKeyEncodingLike,
         fill_value: object,
@@ -263,7 +264,7 @@ def __init__(
         """
 
         # TODO: remove this
-        if not isinstance(data_type, DTypeWrapper):
+        if not isinstance(data_type, ZDType):
             raise TypeError
         shape_parsed = parse_shapelike(shape)
         chunk_grid_parsed = ChunkGrid.from_dict(chunk_grid)
@@ -276,7 +277,7 @@ def __init__(
 
         array_spec = ArraySpec(
             shape=shape_parsed,
-            dtype=data_type.to_dtype(),
+            dtype=data_type,
             fill_value=fill_value_parsed,
             config=ArrayConfig.from_dict({}),  # TODO: config is not needed here.
             prototype=default_buffer_prototype(),  # TODO: prototype is not needed here.
@@ -310,9 +311,7 @@ def _validate_metadata(self) -> None:
         if self.fill_value is None:
             raise ValueError("`fill_value` is required.")
         for codec in self.codecs:
-            codec.validate(
-                shape=self.shape, dtype=self.data_type.to_dtype(), chunk_grid=self.chunk_grid
-            )
+            codec.validate(shape=self.shape, dtype=self.data_type, chunk_grid=self.chunk_grid)
 
     @property
     def ndim(self) -> int:
@@ -380,10 +379,7 @@ def from_dict(cls, data: dict[str, JSON]) -> Self:
         _ = parse_node_type_array(_data.pop("node_type"))
 
         data_type_json = _data.pop("data_type")
-        if isinstance(data_type_json, str):
-            data_type = get_data_type_from_dict({"name": data_type_json})
-        else:
-            data_type = get_data_type_from_dict(data_type_json)
+        data_type = get_data_type_from_json(data_type_json, zarr_format=3)
 
         # check that the fill value is consistent with the data type
         fill_value_parsed = data_type.from_json_value(_data.pop("fill_value"), zarr_format=3)
@@ -408,9 +404,15 @@ def to_dict(self) -> dict[str, JSON]:
         # the metadata document
         if out_dict["dimension_names"] is None:
             out_dict.pop("dimension_names")
-        # if data_type has no configuration, we just serialize the name
-        if "configuration" not in out_dict["data_type"]:
-            out_dict["data_type"] = out_dict["data_type"]["name"]
+
+        # TODO: replace the `to_dict` / `from_dict` on the `Metadata`` class with
+        # to_json, from_json, and have ZDType inherit from `Metadata`
+        # until then, we have this hack here
+        dtype_meta = out_dict["data_type"]
+
+        if isinstance(dtype_meta, ZDType):
+            out_dict["data_type"] = dtype_meta.to_json(zarr_format=3)
+
         return out_dict
 
     def update_shape(self, shape: ChunkCoords) -> Self:
diff --git a/src/zarr/testing/strategies.py b/src/zarr/testing/strategies.py
index 2eef703448..6c2e8f7762 100644
--- a/src/zarr/testing/strategies.py
+++ b/src/zarr/testing/strategies.py
@@ -15,7 +15,7 @@
 from zarr.core.chunk_grids import RegularChunkGrid
 from zarr.core.chunk_key_encodings import DefaultChunkKeyEncoding
 from zarr.core.common import ZarrFormat
-from zarr.core.dtype import parse_data_type
+from zarr.core.dtype import get_data_type_from_native_dtype
 from zarr.core.metadata import ArrayV2Metadata, ArrayV3Metadata
 from zarr.core.sync import sync
 from zarr.storage import MemoryStore, StoreLike
@@ -135,7 +135,7 @@ def array_metadata(
     ndim = len(shape)
     chunk_shape = draw(array_shapes(min_dims=ndim, max_dims=ndim))
     np_dtype = draw(v3_dtypes())
-    dtype = parse_data_type(np_dtype)
+    dtype = get_data_type_from_native_dtype(np_dtype)
     fill_value = draw(npst.from_dtype(np_dtype))
     if zarr_format == 2:
         return ArrayV2Metadata(
diff --git a/tests/conftest.py b/tests/conftest.py
index 5e17c82a37..b2c106f2e2 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -20,7 +20,7 @@
 from zarr.core.chunk_grids import RegularChunkGrid, _auto_partition
 from zarr.core.common import JSON, parse_shapelike
 from zarr.core.config import config as zarr_config
-from zarr.core.dtype import get_data_type_from_numpy
+from zarr.core.dtype import get_data_type_from_native_dtype
 from zarr.core.metadata.v2 import ArrayV2Metadata
 from zarr.core.metadata.v3 import ArrayV3Metadata
 from zarr.core.sync import sync
@@ -253,7 +253,7 @@ def create_array_metadata(
     """
     Create array metadata
     """
-    dtype_parsed = get_data_type_from_numpy(dtype)
+    dtype_parsed = get_data_type_from_native_dtype(dtype)
     shape_parsed = parse_shapelike(shape)
     chunk_key_encoding_parsed = _parse_chunk_key_encoding(
         chunk_key_encoding, zarr_format=zarr_format
diff --git a/tests/test_array.py b/tests/test_array.py
index b2f21d6562..aa61860fa1 100644
--- a/tests/test_array.py
+++ b/tests/test_array.py
@@ -38,7 +38,7 @@
 from zarr.core.buffer.cpu import NDBuffer
 from zarr.core.chunk_grids import _auto_partition
 from zarr.core.common import JSON, MemoryOrder, ZarrFormat
-from zarr.core.dtype import get_data_type_from_numpy
+from zarr.core.dtype import get_data_type_from_native_dtype
 from zarr.core.group import AsyncGroup
 from zarr.core.indexing import BasicIndexer, ceildiv
 from zarr.core.sync import sync
@@ -1035,7 +1035,7 @@ async def test_v2_chunk_encoding(
             filters=filters,
         )
         filters_expected, compressor_expected = _parse_chunk_encoding_v2(
-            filters=filters, compressor=compressors, dtype=get_data_type_from_numpy(dtype)
+            filters=filters, compressor=compressors, dtype=get_data_type_from_native_dtype(dtype)
         )
         assert arr.metadata.zarr_format == 2  # guard for mypy
         assert arr.metadata.compressor == compressor_expected
@@ -1056,7 +1056,7 @@ async def test_default_filters_compressors(
         """
         Test that the default ``filters`` and ``compressors`` are used when ``create_array`` is invoked with ``filters`` and ``compressors`` unspecified.
         """
-        zdtype = get_data_type_from_numpy(dtype_str)
+        zdtype = get_data_type_from_native_dtype(dtype_str)
         arr = await create_array(
             store=store,
             dtype=dtype_str,
diff --git a/tests/test_codecs/test_vlen.py b/tests/test_codecs/test_vlen.py
index ee3415a501..b1508953ea 100644
--- a/tests/test_codecs/test_vlen.py
+++ b/tests/test_codecs/test_vlen.py
@@ -8,7 +8,7 @@
 from zarr.abc.codec import Codec
 from zarr.abc.store import Store
 from zarr.codecs import ZstdCodec
-from zarr.core.dtype import get_data_type_from_numpy
+from zarr.core.dtype import get_data_type_from_native_dtype
 from zarr.core.metadata.v3 import ArrayV3Metadata
 from zarr.core.strings import _NUMPY_SUPPORTS_VLEN_STRING
 from zarr.storage import StorePath
@@ -52,12 +52,12 @@ def test_vlen_string(
     else:
         a[:, :] = data
     assert np.array_equal(data, a[:, :])
-    assert a.metadata.data_type == get_data_type_from_numpy(data.dtype)
+    assert a.metadata.data_type == get_data_type_from_native_dtype(data.dtype)
     assert a.dtype == data.dtype
 
     # test round trip
     b = Array.open(sp)
     assert isinstance(b.metadata, ArrayV3Metadata)  # needed for mypy
     assert np.array_equal(data, b[:, :])
-    assert b.metadata.data_type == get_data_type_from_numpy(data.dtype)
+    assert b.metadata.data_type == get_data_type_from_native_dtype(data.dtype)
     assert a.dtype == data.dtype
diff --git a/tests/test_config.py b/tests/test_config.py
index 34ecfdc119..8d6e0a53ed 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -24,7 +24,7 @@
 from zarr.core.buffer import NDBuffer
 from zarr.core.codec_pipeline import BatchedCodecPipeline
 from zarr.core.config import BadConfigError, config
-from zarr.core.dtype import get_data_type_from_numpy
+from zarr.core.dtype import get_data_type_from_native_dtype
 from zarr.core.indexing import SelectorTuple
 from zarr.registry import (
     fully_qualified_name,
@@ -57,15 +57,15 @@ def test_config_defaults_set() -> None:
                 "v2_default_compressor": {"default": {"id": "zstd", "level": 0, "checksum": False}},
                 "v2_default_filters": {
                     "default": None,
-                    "variable_length_utf8": [{"id": "vlen-utf8"}],
-                    "fixed_length_ucs4": [{"id": "vlen-utf8"}],
-                    "fixed_length_ascii": [{"id": "vlen-bytes"}],
+                    "numpy__variable_length_utf8": [{"id": "vlen-utf8"}],
+                    "numpy__fixed_length_ucs4": [{"id": "vlen-utf8"}],
+                    "numpy__fixed_length_ascii": [{"id": "vlen-bytes"}],
                 },
                 "v3_default_filters": {"default": []},
                 "v3_default_serializer": {
                     "default": {"name": "bytes", "configuration": {"endian": "little"}},
-                    "variable_length_utf8": {"name": "vlen-utf8"},
-                    "fixed_length_ucs4": {"name": "vlen-utf8"},
+                    "numpy__variable_length_utf8": {"name": "vlen-utf8"},
+                    "numpy__fixed_length_ucs4": {"name": "vlen-utf8"},
                     "r*": {"name": "vlen-bytes"},
                 },
                 "v3_default_compressors": {
@@ -302,10 +302,10 @@ async def test_default_codecs(dtype: str) -> None:
     """
     Test that the default compressors are sensitive to the current setting of the config.
     """
-    zdtype = get_data_type_from_numpy(dtype)
+    zdtype = get_data_type_from_native_dtype(dtype)
     expected_compressors = (GzipCodec(),)
     new_conf = {
-        f"array.v3_default_compressors.{zdtype._zarr_v3_name}": [
+        f"array.v3_default_compressors.{zdtype._zarr_v3_name.replace('.', '__')}": [
             c.to_dict() for c in expected_compressors
         ]
     }
diff --git a/tests/test_metadata/test_consolidated.py b/tests/test_metadata/test_consolidated.py
index 508519e696..c9ab06f1e2 100644
--- a/tests/test_metadata/test_consolidated.py
+++ b/tests/test_metadata/test_consolidated.py
@@ -504,7 +504,7 @@ async def test_consolidated_metadata_backwards_compatibility(
     async def test_consolidated_metadata_v2(self):
         store = zarr.storage.MemoryStore()
         g = await AsyncGroup.from_store(store, attributes={"key": "root"}, zarr_format=2)
-        dtype = parse_data_type("uint8")
+        dtype = parse_data_type("uint8", zarr_format=2)
         await g.create_array(name="a", shape=(1,), attributes={"key": "a"}, dtype=dtype)
         g1 = await g.create_group(name="g1", attributes={"key": "g1"})
         await g1.create_group(name="g2", attributes={"key": "g2"})
diff --git a/tests/test_metadata/test_dtype.py b/tests/test_metadata/test_dtype.py
index ee19cdf845..db575ee16a 100644
--- a/tests/test_metadata/test_dtype.py
+++ b/tests/test_metadata/test_dtype.py
@@ -1,14 +1,19 @@
 from __future__ import annotations
 
-from typing import Any, get_args
+import re
+from typing import TYPE_CHECKING, Any, get_args
+
+if TYPE_CHECKING:
+    from zarr.core.common import ZarrFormat
+    from zarr.core.dtype.wrapper import _BaseDType, _BaseScalar
 
 import numpy as np
 import pytest
 
 from zarr.core.dtype import (
     DTYPE,
-    DTypeWrapper,
     VariableLengthString,
+    ZDType,
     data_type_registry,
 )
 from zarr.core.dtype._numpy import (
@@ -16,9 +21,9 @@
     Complex64,
     Complex128,
     DateTime64,
-    FixedLengthAsciiString,
+    FixedLengthAscii,
     FixedLengthBytes,
-    FixedLengthUnicodeString,
+    FixedLengthUnicode,
     Float16,
     Float32,
     Float64,
@@ -37,7 +42,7 @@
 
 
 @pytest.fixture
-def dtype_registry() -> DataTypeRegistry:
+def data_type_registry_fixture() -> DataTypeRegistry:
     return DataTypeRegistry()
 
 
@@ -66,15 +71,15 @@ def dtype_registry() -> DataTypeRegistry:
         (Float64, "float64"),
         (Complex64, "complex64"),
         (Complex128, "complex128"),
-        (FixedLengthUnicodeString, "U"),
-        (FixedLengthAsciiString, "S"),
+        (FixedLengthUnicode, "U"),
+        (FixedLengthAscii, "S"),
         (FixedLengthBytes, "V"),
         (VariableLengthString, VLEN_STRING_CODE),
         (Structured, np.dtype([("a", np.float64), ("b", np.int8)])),
         (DateTime64, "datetime64[s]"),
     ],
 )
-def test_wrap(wrapper_cls: type[DTypeWrapper[Any, Any]], np_dtype: np.dtype | str) -> None:
+def test_wrap(wrapper_cls: type[ZDType[_BaseDType, _BaseScalar]], np_dtype: np.dtype | str) -> None:
     """
     Test that the wrapper class has the correct dtype class bound to the dtype_cls variable
     Test that the ``wrap`` method produces an instance of the wrapper class
@@ -92,13 +97,13 @@ def test_wrap(wrapper_cls: type[DTypeWrapper[Any, Any]], np_dtype: np.dtype | st
 
 
 @pytest.mark.parametrize("wrapper_cls", get_args(DTYPE))
-def test_dict_serialization(wrapper_cls: DTYPE) -> None:
+def test_dict_serialization(wrapper_cls: DTYPE, zarr_format: ZarrFormat) -> None:
     if issubclass(wrapper_cls, Structured):
         instance = wrapper_cls(fields=((("a", Bool()),)))
     else:
         instance = wrapper_cls()
-    as_dict = instance.to_dict()
-    assert wrapper_cls.from_dict(as_dict) == instance
+    as_dict = instance.to_json(zarr_format=zarr_format)
+    assert wrapper_cls.from_json(as_dict, zarr_format=zarr_format) == instance
 
 
 @pytest.mark.parametrize(
@@ -118,9 +123,9 @@ def test_dict_serialization(wrapper_cls: DTYPE) -> None:
         (Float64(), np.float64(0)),
         (Complex64(), np.complex64(0)),
         (Complex128(), np.complex128(0)),
-        (FixedLengthAsciiString(length=3), np.bytes_(b"")),
+        (FixedLengthAscii(length=3), np.bytes_(b"")),
         (FixedLengthBytes(length=3), np.void(b"\x00\x00\x00")),
-        (FixedLengthUnicodeString(length=3), np.str_("")),
+        (FixedLengthUnicode(length=3), np.str_("")),
         (
             Structured(fields=(("a", Float64()), ("b", Int8()))),
             np.array([0], dtype=[("a", np.float64), ("b", np.int8)])[0],
@@ -129,7 +134,9 @@ def test_dict_serialization(wrapper_cls: DTYPE) -> None:
         (DateTime64(unit="s"), np.datetime64("NaT")),
     ],
 )
-def test_default_value(wrapper: type[DTypeWrapper[Any, Any]], expected_default: Any) -> None:
+def test_default_value(
+    wrapper: type[ZDType[_BaseDType, _BaseScalar]], expected_default: Any
+) -> None:
     """
     Test that the default_value method is correctly set for each dtype wrapper.
     """
@@ -156,15 +163,15 @@ def test_default_value(wrapper: type[DTypeWrapper[Any, Any]], expected_default:
         (Float64(), np.float64(42.0), 42.0),
         (Complex64(), np.complex64(42.0 + 1.0j), (42.0, 1.0)),
         (Complex128(), np.complex128(42.0 + 1.0j), (42.0, 1.0)),
-        (FixedLengthAsciiString(length=4), np.bytes_(b"test"), "dGVzdA=="),
+        (FixedLengthAscii(length=4), np.bytes_(b"test"), "dGVzdA=="),
         (FixedLengthBytes(length=4), np.void(b"test"), "dGVzdA=="),
-        (FixedLengthUnicodeString(length=4), np.str_("test"), "test"),
+        (FixedLengthUnicode(length=4), np.str_("test"), "test"),
         (VariableLengthString(), "test", "test"),
         (DateTime64(unit="s"), np.datetime64("2021-01-01T00:00:00", "s"), 1609459200),
     ],
 )
 def test_to_json_value_v2(
-    wrapper: type[DTypeWrapper[Any, Any]], input_value: Any, expected_json: Any
+    wrapper: type[ZDType[_BaseDType, _BaseScalar]], input_value: Any, expected_json: Any
 ) -> None:
     """
     Test the to_json_value method for each dtype wrapper for zarr v2
@@ -189,15 +196,15 @@ def test_to_json_value_v2(
         (Float64(), 42.0, np.float64(42.0)),
         (Complex64(), (42.0, 1.0), np.complex64(42.0 + 1.0j)),
         (Complex128(), (42.0, 1.0), np.complex128(42.0 + 1.0j)),
-        (FixedLengthAsciiString(length=4), "dGVzdA==", np.bytes_(b"test")),
+        (FixedLengthAscii(length=4), "dGVzdA==", np.bytes_(b"test")),
         (FixedLengthBytes(length=4), "dGVzdA==", np.void(b"test")),
-        (FixedLengthUnicodeString(length=4), "test", np.str_("test")),
+        (FixedLengthUnicode(length=4), "test", np.str_("test")),
         (VariableLengthString(), "test", "test"),
         (DateTime64(unit="s"), 1609459200, np.datetime64("2021-01-01T00:00:00", "s")),
     ],
 )
 def test_from_json_value(
-    wrapper: type[DTypeWrapper[Any, Any]], json_value: Any, expected_value: Any
+    wrapper: type[ZDType[_BaseDType, _BaseScalar]], json_value: Any, expected_value: Any
 ) -> None:
     """
     Test the from_json_value method for each dtype wrapper.
@@ -207,43 +214,45 @@ def test_from_json_value(
 
 class TestRegistry:
     @staticmethod
-    def test_register(dtype_registry: DataTypeRegistry) -> None:
+    def test_register(data_type_registry_fixture: DataTypeRegistry) -> None:
         """
         Test that registering a dtype in a data type registry works.
         """
-        dtype_registry.register(Bool._zarr_v3_name, Bool)
-        assert dtype_registry.get(Bool._zarr_v3_name) == Bool
-        assert isinstance(dtype_registry.match_dtype(np.dtype("bool")), Bool)
+        data_type_registry_fixture.register(Bool._zarr_v3_name, Bool)
+        assert data_type_registry_fixture.get(Bool._zarr_v3_name) == Bool
+        assert isinstance(data_type_registry_fixture.match_dtype(np.dtype("bool")), Bool)
 
     @staticmethod
-    def test_override(dtype_registry: DataTypeRegistry) -> None:
+    def test_override(data_type_registry_fixture: DataTypeRegistry) -> None:
         """
         Test that registering a new dtype with the same name works (overriding the previous one).
         """
-        dtype_registry.register(Bool._zarr_v3_name, Bool)
+        data_type_registry_fixture.register(Bool._zarr_v3_name, Bool)
 
         class NewBool(Bool):
             def default_value(self) -> np.bool_:
                 return np.True_
 
-        dtype_registry.register(NewBool._zarr_v3_name, NewBool)
-        assert isinstance(dtype_registry.match_dtype(np.dtype("bool")), NewBool)
+        data_type_registry_fixture.register(NewBool._zarr_v3_name, NewBool)
+        assert isinstance(data_type_registry_fixture.match_dtype(np.dtype("bool")), NewBool)
 
     @staticmethod
     @pytest.mark.parametrize(
-        ("wrapper_cls", "dtype_str"), [(Bool, "bool"), (FixedLengthUnicodeString, "|U4")]
+        ("wrapper_cls", "dtype_str"), [(Bool, "bool"), (FixedLengthUnicode, "|U4")]
     )
     def test_match_dtype(
-        dtype_registry: DataTypeRegistry, wrapper_cls: type[DTypeWrapper[Any, Any]], dtype_str: str
+        data_type_registry_fixture: DataTypeRegistry,
+        wrapper_cls: type[ZDType[_BaseDType, _BaseScalar]],
+        dtype_str: str,
     ) -> None:
         """
         Test that match_dtype resolves a numpy dtype into an instance of the correspond wrapper for that dtype.
         """
-        dtype_registry.register(wrapper_cls._zarr_v3_name, wrapper_cls)
-        assert isinstance(dtype_registry.match_dtype(np.dtype(dtype_str)), wrapper_cls)
+        data_type_registry_fixture.register(wrapper_cls._zarr_v3_name, wrapper_cls)
+        assert isinstance(data_type_registry_fixture.match_dtype(np.dtype(dtype_str)), wrapper_cls)
 
     @staticmethod
-    def test_unregistered_dtype(dtype_registry: DataTypeRegistry) -> None:
+    def test_unregistered_dtype(data_type_registry_fixture: DataTypeRegistry) -> None:
         """
         Test that match_dtype raises an error if the dtype is not registered.
         """
@@ -251,14 +260,16 @@ def test_unregistered_dtype(dtype_registry: DataTypeRegistry) -> None:
         with pytest.raises(
             ValueError, match=f"No data type wrapper found that matches dtype '{outside_dtype}'"
         ):
-            dtype_registry.match_dtype(np.dtype(outside_dtype))
+            data_type_registry_fixture.match_dtype(np.dtype(outside_dtype))
 
         with pytest.raises(KeyError):
-            dtype_registry.get(outside_dtype)
+            data_type_registry_fixture.get(outside_dtype)
 
     @staticmethod
     @pytest.mark.parametrize("wrapper_cls", get_args(DTYPE))
-    def test_registered_dtypes(wrapper_cls: DTypeWrapper[Any, Any]) -> None:
+    def test_registered_dtypes(
+        wrapper_cls: ZDType[_BaseDType, _BaseScalar], zarr_format: ZarrFormat
+    ) -> None:
         """
         Test that the registered dtypes can be retrieved from the registry.
         """
@@ -268,3 +279,40 @@ def test_registered_dtypes(wrapper_cls: DTypeWrapper[Any, Any]) -> None:
             instance = wrapper_cls()
 
         assert data_type_registry.match_dtype(instance.to_dtype()) == instance
+        assert (
+            data_type_registry.match_json(
+                instance.to_json(zarr_format=zarr_format), zarr_format=zarr_format
+            )
+            == instance
+        )
+
+    @staticmethod
+    @pytest.mark.parametrize("wrapper_cls", get_args(DTYPE))
+    def test_match_dtype_unique(
+        wrapper_cls: ZDType[_BaseDType, _BaseScalar],
+        data_type_registry_fixture: DataTypeRegistry,
+        zarr_format: ZarrFormat,
+    ) -> None:
+        """
+        Test that the match_dtype method uniquely specifies a registered data type. We create a local registry
+        that excludes the data type class being tested, and ensure that an instance of the wrapped data type
+        fails to match anything in the registry
+        """
+        for _cls in get_args(DTYPE):
+            if _cls is not wrapper_cls:
+                data_type_registry_fixture.register(_cls._zarr_v3_name, _cls)
+
+        if issubclass(wrapper_cls, Structured):
+            instance = wrapper_cls(fields=((("a", Bool()),)))
+        else:
+            instance = wrapper_cls()
+        dtype_instance = instance.to_dtype()
+
+        msg = f"No data type wrapper found that matches dtype '{dtype_instance}'"
+        with pytest.raises(ValueError, match=re.escape(msg)):
+            data_type_registry_fixture.match_dtype(dtype_instance)
+
+        instance_dict = instance.to_json(zarr_format=zarr_format)
+        msg = f"No data type wrapper found that matches {instance_dict}"
+        with pytest.raises(ValueError, match=re.escape(msg)):
+            data_type_registry_fixture.match_json(instance_dict, zarr_format=zarr_format)
diff --git a/tests/test_metadata/test_v3.py b/tests/test_metadata/test_v3.py
index 23f28ab097..bd5f9be8b6 100644
--- a/tests/test_metadata/test_v3.py
+++ b/tests/test_metadata/test_v3.py
@@ -11,7 +11,7 @@
 from zarr.core.buffer import default_buffer_prototype
 from zarr.core.chunk_key_encodings import DefaultChunkKeyEncoding, V2ChunkKeyEncoding
 from zarr.core.config import config
-from zarr.core.dtype import get_data_type_from_numpy
+from zarr.core.dtype import get_data_type_from_native_dtype
 from zarr.core.dtype._numpy import DateTime64
 from zarr.core.dtype.common import complex_from_json
 from zarr.core.group import GroupMetadata, parse_node_type
@@ -128,7 +128,7 @@ def test_jsonify_fill_value_complex(fill_value: Any, dtype_str: str) -> None:
     as length-2 sequences
     """
     zarr_format = 3
-    dtype = get_data_type_from_numpy(dtype_str)
+    dtype = get_data_type_from_native_dtype(dtype_str)
     expected = dtype.to_dtype().type(complex(*fill_value))
     observed = dtype.from_json_value(fill_value, zarr_format=zarr_format)
     assert observed == expected
@@ -142,7 +142,7 @@ def test_complex_to_json_invalid(data: object, dtype_str: str) -> None:
     Test that parse_fill_value(fill_value, dtype) correctly rejects sequences with length not
     equal to 2
     """
-    dtype_instance = get_data_type_from_numpy(dtype_str)
+    dtype_instance = get_data_type_from_native_dtype(dtype_str)
     match = f"Invalid type: {data}. Expected a sequence of two numbers."
     with pytest.raises(TypeError, match=re.escape(match)):
         complex_from_json(data=data, dtype=dtype_instance, zarr_format=3)
@@ -155,7 +155,7 @@ def test_parse_fill_value_invalid_type(fill_value: Any, dtype_str: str) -> None:
     Test that parse_fill_value(fill_value, dtype) raises TypeError for invalid non-sequential types.
     This test excludes bool because the bool constructor takes anything.
     """
-    dtype_instance = get_data_type_from_numpy(dtype_str)
+    dtype_instance = get_data_type_from_native_dtype(dtype_str)
     with pytest.raises(TypeError, match=f"Invalid type: {fill_value}"):
         dtype_instance.from_json_value(fill_value, zarr_format=3)
 
@@ -176,7 +176,7 @@ def test_parse_fill_value_invalid_type_sequence(fill_value: Any, dtype_str: str)
     This test excludes bool because the bool constructor takes anything, and complex because
     complex values can be created from length-2 sequences.
     """
-    dtype_instance = get_data_type_from_numpy(dtype_str)
+    dtype_instance = get_data_type_from_native_dtype(dtype_str)
     with pytest.raises(TypeError, match=re.escape(f"Invalid type: {fill_value}")):
         dtype_instance.from_json_value(fill_value, zarr_format=3)
 
@@ -277,10 +277,12 @@ async def test_datetime_metadata(fill_value: int, precision: str) -> None:
         "node_type": "array",
         "shape": (1,),
         "chunk_grid": {"name": "regular", "configuration": {"chunk_shape": (1,)}},
-        "data_type": dtype.to_dict(),
+        "data_type": dtype.to_json(zarr_format=3),
         "chunk_key_encoding": {"name": "default", "separator": "."},
         "codecs": (BytesCodec(),),
-        "fill_value": dtype.to_json_value(dtype.cast_value(fill_value), zarr_format=3),
+        "fill_value": dtype.to_json_value(
+            dtype.to_dtype().type(fill_value, dtype.unit), zarr_format=3
+        ),
     }
     metadata = ArrayV3Metadata.from_dict(metadata_dict)
     # ensure there isn't a TypeError here.

From cbb159d5ee1f0ff01adf9cb02f8cef2a7715244e Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Wed, 19 Mar 2025 22:33:01 +0100
Subject: [PATCH 040/130] update code examples in docs; remove native
 endianness

---
 docs/user-guide/arrays.rst                | 12 +++----
 docs/user-guide/consolidated_metadata.rst |  6 ++--
 docs/user-guide/data_types.rst            |  6 ++--
 docs/user-guide/groups.rst                |  4 +--
 docs/user-guide/performance.rst           | 10 +++---
 src/zarr/core/_info.py                    |  5 ++-
 src/zarr/core/dtype/_numpy.py             | 41 +++++++++++------------
 src/zarr/core/dtype/common.py             |  2 +-
 8 files changed, 42 insertions(+), 44 deletions(-)

diff --git a/docs/user-guide/arrays.rst b/docs/user-guide/arrays.rst
index a62b2ea0fa..f55dd00c80 100644
--- a/docs/user-guide/arrays.rst
+++ b/docs/user-guide/arrays.rst
@@ -182,7 +182,7 @@ which can be used to print useful diagnostics, e.g.::
    >>> z.info
    Type               : Array
    Zarr format        : 3
-   Data type          : DataType.int32
+   Data type          : int32
    Shape              : (10000, 10000)
    Chunk shape        : (1000, 1000)
    Order              : C
@@ -199,7 +199,7 @@ prints additional diagnostics, e.g.::
    >>> z.info_complete()
    Type               : Array
    Zarr format        : 3
-   Data type          : DataType.int32
+   Data type          : int32
    Shape              : (10000, 10000)
    Chunk shape        : (1000, 1000)
    Order              : C
@@ -286,7 +286,7 @@ Here is an example using a delta filter with the Blosc compressor::
    >>> z.info
    Type               : Array
    Zarr format        : 3
-   Data type          : DataType.int32
+   Data type          : int32
    Shape              : (10000, 10000)
    Chunk shape        : (1000, 1000)
    Order              : C
@@ -600,7 +600,7 @@ Sharded arrays can be created by providing the ``shards`` parameter to :func:`za
   >>> a.info_complete()
   Type               : Array
   Zarr format        : 3
-  Data type          : DataType.uint8
+  Data type          : uint8
   Shape              : (10000, 10000)
   Shard shape        : (1000, 1000)
   Chunk shape        : (100, 100)
@@ -608,10 +608,10 @@ Sharded arrays can be created by providing the ``shards`` parameter to :func:`za
   Read-only          : False
   Store type         : LocalStore
   Filters            : ()
-  Serializer         : BytesCodec(endian=<Endian.little: 'little'>)
+  Serializer         : BytesCodec(endian=None)
   Compressors        : (ZstdCodec(level=0, checksum=False),)
   No. bytes          : 100000000 (95.4M)
-  No. bytes stored   : 3981552
+  No. bytes stored   : 3981473
   Storage ratio      : 25.1
   Shards Initialized : 100
 
diff --git a/docs/user-guide/consolidated_metadata.rst b/docs/user-guide/consolidated_metadata.rst
index 3c015dcfca..fc410de7d4 100644
--- a/docs/user-guide/consolidated_metadata.rst
+++ b/docs/user-guide/consolidated_metadata.rst
@@ -47,7 +47,7 @@ that can be used.:
    >>> from pprint import pprint
    >>> pprint(dict(sorted(consolidated_metadata.items())))
    {'a': ArrayV3Metadata(shape=(1,),
-                          data_type=<DataType.float64: 'float64'>,
+                          data_type=Float64(endianness='little'),
                           chunk_grid=RegularChunkGrid(chunk_shape=(1,)),
                           chunk_key_encoding=DefaultChunkKeyEncoding(name='default',
                                                                      separator='/'),
@@ -60,7 +60,7 @@ that can be used.:
                           node_type='array',
                           storage_transformers=()),
      'b': ArrayV3Metadata(shape=(2, 2),
-                          data_type=<DataType.float64: 'float64'>,
+                          data_type=Float64(endianness='little'),
                           chunk_grid=RegularChunkGrid(chunk_shape=(2, 2)),
                           chunk_key_encoding=DefaultChunkKeyEncoding(name='default',
                                                                      separator='/'),
@@ -73,7 +73,7 @@ that can be used.:
                           node_type='array',
                           storage_transformers=()),
      'c': ArrayV3Metadata(shape=(3, 3, 3),
-                          data_type=<DataType.float64: 'float64'>,
+                          data_type=Float64(endianness='little'),
                           chunk_grid=RegularChunkGrid(chunk_shape=(3, 3, 3)),
                           chunk_key_encoding=DefaultChunkKeyEncoding(name='default',
                                                                      separator='/'),
diff --git a/docs/user-guide/data_types.rst b/docs/user-guide/data_types.rst
index fffd622209..b964439706 100644
--- a/docs/user-guide/data_types.rst
+++ b/docs/user-guide/data_types.rst
@@ -10,9 +10,9 @@ Zarr-Python supports creating arrays with Numpy data types::
 
   >>> import zarr
   >>> import numpy as np
-  >>> zarr.create_array(store={}, shape=(10,), dtype=np.dtype('uint8'))
+  >>> z = zarr.create_array(store={}, shape=(10,), dtype=np.dtype('uint8'))
   >>> z
-  <Array memory://126225407345920 shape=(10,) dtype=uint8>
+  <Array memory:... shape=(10,) dtype=uint8>
 
 Unlike Numpy arrays, Zarr arrays are designed to be persisted to storage and read by Zarr implementations in different programming languages.
 This means Zarr data types must be interpreted correctly when clients read an array. So each Zarr data type defines a procedure for
@@ -34,7 +34,7 @@ Thus the JSON identifier for a Numpy-compatible data type is just the Numpy ``st
     >>> dtype_meta = json.loads(store['.zarray'].to_bytes())["dtype"]
     >>> assert dtype_meta == np_dtype.str # True
     >>> dtype_meta
-    <i8
+    '<i8'
 
 .. note::
   The ``<`` character in the data type metadata encodes the `endianness <https://numpy.org/doc/2.2/reference/generated/numpy.dtype.byteorder.html>`_, or "byte order", of the data type. Following Numpy's example,
diff --git a/docs/user-guide/groups.rst b/docs/user-guide/groups.rst
index 4268004f70..c2a955718b 100644
--- a/docs/user-guide/groups.rst
+++ b/docs/user-guide/groups.rst
@@ -128,7 +128,7 @@ property. E.g.::
    >>> bar.info_complete()
    Type               : Array
    Zarr format        : 3
-   Data type          : DataType.int64
+   Data type          : int64
    Shape              : (1000000,)
    Chunk shape        : (100000,)
    Order              : C
@@ -144,7 +144,7 @@ property. E.g.::
    >>> baz.info
    Type               : Array
    Zarr format        : 3
-   Data type          : DataType.float32
+   Data type          : float32
    Shape              : (1000, 1000)
    Chunk shape        : (100, 100)
    Order              : C
diff --git a/docs/user-guide/performance.rst b/docs/user-guide/performance.rst
index 42d830780f..5c7844f92c 100644
--- a/docs/user-guide/performance.rst
+++ b/docs/user-guide/performance.rst
@@ -52,7 +52,7 @@ a chunk shape is based on simple heuristics and may be far from optimal. E.g.::
 
    >>> z4 = zarr.create_array(store={}, shape=(10000, 10000), chunks='auto', dtype='int32')
    >>> z4.chunks
-   (625, 625)
+   (313, 625)
 
 If you know you are always going to be loading the entire array into memory, you
 can turn off chunks by providing ``chunks`` equal to ``shape``, in which case there
@@ -91,7 +91,7 @@ To use sharding, you need to specify the ``shards`` parameter when creating the
    >>> z6.info
    Type               : Array
    Zarr format        : 3
-   Data type          : DataType.uint8
+   Data type          : uint8
    Shape              : (10000, 10000, 1000)
    Shard shape        : (1000, 1000, 1000)
    Chunk shape        : (100, 100, 100)
@@ -99,7 +99,7 @@ To use sharding, you need to specify the ``shards`` parameter when creating the
    Read-only          : False
    Store type         : MemoryStore
    Filters            : ()
-   Serializer         : BytesCodec(endian=<Endian.little: 'little'>)
+   Serializer         : BytesCodec(endian=None)
    Compressors        : (ZstdCodec(level=0, checksum=False),)
    No. bytes          : 100000000000 (93.1G)
 
@@ -121,7 +121,7 @@ ratios, depending on the correlation structure within the data. E.g.::
    >>> c.info_complete()
    Type               : Array
    Zarr format        : 3
-   Data type          : DataType.int32
+   Data type          : int32
    Shape              : (10000, 10000)
    Chunk shape        : (1000, 1000)
    Order              : C
@@ -140,7 +140,7 @@ ratios, depending on the correlation structure within the data. E.g.::
    >>> f.info_complete()
    Type               : Array
    Zarr format        : 3
-   Data type          : DataType.int32
+   Data type          : int32
    Shape              : (10000, 10000)
    Chunk shape        : (1000, 1000)
    Order              : F
diff --git a/src/zarr/core/_info.py b/src/zarr/core/_info.py
index c9637b156a..525b80c65f 100644
--- a/src/zarr/core/_info.py
+++ b/src/zarr/core/_info.py
@@ -2,11 +2,10 @@
 
 import dataclasses
 import textwrap
-from typing import TYPE_CHECKING, Any, Literal
+from typing import TYPE_CHECKING, Literal
 
 if TYPE_CHECKING:
     import numcodecs.abc
-    import numpy as np
 
     from zarr.abc.codec import ArrayArrayCodec, ArrayBytesCodec, BytesBytesCodec
     from zarr.core.common import ZarrFormat
@@ -81,7 +80,7 @@ class ArrayInfo:
 
     _type: Literal["Array"] = "Array"
     _zarr_format: ZarrFormat
-    _data_type: np.dtype[Any] | ZDType[_BaseDType, _BaseScalar]
+    _data_type: ZDType[_BaseDType, _BaseScalar]
     _shape: tuple[int, ...]
     _shard_shape: tuple[int, ...] | None = None
     _chunk_shape: tuple[int, ...] | None = None
diff --git a/src/zarr/core/dtype/_numpy.py b/src/zarr/core/dtype/_numpy.py
index a8bd2b5951..f8ebc807d3 100644
--- a/src/zarr/core/dtype/_numpy.py
+++ b/src/zarr/core/dtype/_numpy.py
@@ -2,6 +2,7 @@
 
 import base64
 import re
+import sys
 from collections.abc import Sequence
 from dataclasses import dataclass
 from typing import (
@@ -40,7 +41,7 @@
 if TYPE_CHECKING:
     from zarr.core.common import JSON, ZarrFormat
 
-EndiannessNumpy = Literal[">", "<", "=", "|"]
+EndiannessNumpy = Literal[">", "<", "|", "="]
 
 
 @dataclass(frozen=True, kw_only=True)
@@ -57,7 +58,7 @@ class Bool(ZDType[np.dtypes.BoolDType, np.bool_]):
     """
 
     _zarr_v3_name = "bool"
-    _zarr_v2_names: ClassVar[tuple[str,...]] = ("|b1",)
+    _zarr_v2_names: ClassVar[tuple[str, ...]] = ("|b1",)
     dtype_cls = np.dtypes.BoolDType
 
     @classmethod
@@ -314,7 +315,7 @@ class Int16(ZDType[np.dtypes.Int16DType, np.int16]):
     dtype_cls = np.dtypes.Int16DType
     _zarr_v3_name = "int16"
     _zarr_v2_names: ClassVar[tuple[str, ...]] = (">i2", "<i2")
-    endianness: Endianness | None = "native"
+    endianness: Endianness | None = "little"
 
     @classmethod
     def _from_dtype_unsafe(cls, dtype: np.dtypes.Int16DType) -> Self:
@@ -370,7 +371,7 @@ class UInt16(ZDType[np.dtypes.UInt16DType, np.uint16]):
     dtype_cls = np.dtypes.UInt16DType
     _zarr_v3_name = "uint16"
     _zarr_v2_names: ClassVar[tuple[str, ...]] = (">u2", "<u2")
-    endianness: Endianness | None = "native"
+    endianness: Endianness | None = "little"
 
     @classmethod
     def _from_dtype_unsafe(cls, dtype: np.dtypes.UInt16DType) -> Self:
@@ -426,7 +427,7 @@ class Int32(ZDType[np.dtypes.Int32DType, np.int32]):
     dtype_cls = np.dtypes.Int32DType
     _zarr_v3_name = "int32"
     _zarr_v2_names: ClassVar[tuple[str, ...]] = (">i4", "<i4")
-    endianness: Endianness | None = "native"
+    endianness: Endianness | None = "little"
 
     @classmethod
     def _from_dtype_unsafe(cls, dtype: np.dtypes.Int32DType) -> Self:
@@ -482,7 +483,7 @@ class UInt32(ZDType[np.dtypes.UInt32DType, np.uint32]):
     dtype_cls = np.dtypes.UInt32DType
     _zarr_v3_name = "uint32"
     _zarr_v2_names: ClassVar[tuple[str, ...]] = (">u4", "<u4")
-    endianness: Endianness | None = "native"
+    endianness: Endianness | None = "little"
 
     @classmethod
     def _from_dtype_unsafe(cls, dtype: np.dtypes.UInt32DType) -> Self:
@@ -538,7 +539,7 @@ class Int64(ZDType[np.dtypes.Int64DType, np.int64]):
     dtype_cls = np.dtypes.Int64DType
     _zarr_v3_name = "int64"
     _zarr_v2_names: ClassVar[tuple[str, ...]] = (">i8", "<i8")
-    endianness: Endianness | None = "native"
+    endianness: Endianness | None = "little"
 
     @classmethod
     def _from_dtype_unsafe(cls, dtype: np.dtypes.Int64DType) -> Self:
@@ -594,7 +595,7 @@ class UInt64(ZDType[np.dtypes.UInt64DType, np.uint64]):
     dtype_cls = np.dtypes.UInt64DType
     _zarr_v3_name = "uint64"
     _zarr_v2_names: ClassVar[tuple[str, ...]] = (">u8", "<u8")
-    endianness: Endianness | None = "native"
+    endianness: Endianness | None = "little"
 
     @classmethod
     def _from_dtype_unsafe(cls, dtype: np.dtypes.UInt64DType) -> Self:
@@ -650,7 +651,7 @@ class Float16(ZDType[np.dtypes.Float16DType, np.float16]):
     dtype_cls = np.dtypes.Float16DType
     _zarr_v3_name = "float16"
     _zarr_v2_names: ClassVar[tuple[str, ...]] = (">f2", "<f2")
-    endianness: Endianness | None = "native"
+    endianness: Endianness | None = "little"
 
     @classmethod
     def _from_dtype_unsafe(cls, dtype: np.dtypes.Float16DType) -> Self:
@@ -706,7 +707,7 @@ class Float32(ZDType[np.dtypes.Float32DType, np.float32]):
     dtype_cls = np.dtypes.Float32DType
     _zarr_v3_name = "float32"
     _zarr_v2_names: ClassVar[tuple[str, ...]] = (">f4", "<f4")
-    endianness: Endianness | None = "native"
+    endianness: Endianness | None = "little"
 
     @classmethod
     def _from_dtype_unsafe(cls, dtype: np.dtypes.Float32DType) -> Self:
@@ -762,7 +763,7 @@ class Float64(ZDType[np.dtypes.Float64DType, np.float64]):
     dtype_cls = np.dtypes.Float64DType
     _zarr_v3_name = "float64"
     _zarr_v2_names: ClassVar[tuple[str, ...]] = (">f8", "<f8")
-    endianness: Endianness | None = "native"
+    endianness: Endianness | None = "little"
 
     @classmethod
     def _from_dtype_unsafe(cls, dtype: np.dtypes.Float64DType) -> Self:
@@ -818,7 +819,7 @@ class Complex64(ZDType[np.dtypes.Complex64DType, np.complex64]):
     dtype_cls = np.dtypes.Complex64DType
     _zarr_v3_name = "complex64"
     _zarr_v2_names: ClassVar[tuple[str, ...]] = (">c8", "<c8")
-    endianness: Endianness | None = "native"
+    endianness: Endianness | None = "little"
 
     @classmethod
     def _from_dtype_unsafe(cls, dtype: np.dtypes.Complex64DType) -> Self:
@@ -876,7 +877,7 @@ class Complex128(ZDType[np.dtypes.Complex128DType, np.complex128]):
     dtype_cls = np.dtypes.Complex128DType
     _zarr_v3_name = "complex128"
     _zarr_v2_names: ClassVar[tuple[str, ...]] = (">c16", "<c16")
-    endianness: Endianness | None = "native"
+    endianness: Endianness | None = "little"
 
     @classmethod
     def _from_dtype_unsafe(cls, dtype: np.dtypes.Complex128DType) -> Self:
@@ -1079,7 +1080,7 @@ class FixedLengthUnicode(ZDType[np.dtypes.StrDType[int], np.str_]):
     dtype_cls = np.dtypes.StrDType
     _zarr_v3_name = "numpy.fixed_length_ucs4"
     item_size_bits: ClassVar[int] = 32  # UCS4 is 32 bits per code point
-    endianness: Endianness | None = "native"
+    endianness: Endianness | None = "little"
     length: int = 1
 
     @classmethod
@@ -1263,7 +1264,7 @@ class DateTime64(ZDType[np.dtypes.DateTime64DType, np.datetime64]):
     dtype_cls = np.dtypes.DateTime64DType  # type: ignore[assignment]
     _zarr_v3_name = "numpy.datetime64"
     unit: DateUnit | TimeUnit = "s"
-    endianness: Endianness | None = "native"
+    endianness: Endianness | None = "little"
 
     @classmethod
     def _from_dtype_unsafe(cls, dtype: np.dtypes.DateTime64DType) -> Self:
@@ -1457,7 +1458,7 @@ def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.void:
             raise TypeError(f"Invalid type: {data}. Expected a string.")
         as_bytes = bytes_from_json(data, zarr_format=zarr_format)
         dtype = self.to_dtype()
-        return cast(np.void, np.array([as_bytes], dtype=dtype.str).view(dtype)[0])
+        return cast("np.void", np.array([as_bytes], dtype=dtype.str).view(dtype)[0])
 
 
 def endianness_to_numpy_str(endianness: Endianness | None) -> EndiannessNumpy:
@@ -1471,7 +1472,7 @@ def endianness_to_numpy_str(endianness: Endianness | None) -> EndiannessNumpy:
 
     Returns
     -------
-    Literal[">", "<", "=", "|"]
+    Literal[">", "<", "|"]
         The numpy string representation of the endianness.
 
     Raises
@@ -1484,8 +1485,6 @@ def endianness_to_numpy_str(endianness: Endianness | None) -> EndiannessNumpy:
             return "<"
         case "big":
             return ">"
-        case "native":
-            return "="
         case None:
             return "|"
     raise ValueError(
@@ -1513,12 +1512,12 @@ def endianness_from_numpy_str(endianness: EndiannessNumpy) -> Endianness | None:
         If the endianness is invalid.
     """
     match endianness:
+        case "=":
+            return sys.byteorder
         case "<":
             return "little"
         case ">":
             return "big"
-        case "=":
-            return "native"
         case "|":
             return None
     raise ValueError(
diff --git a/src/zarr/core/dtype/common.py b/src/zarr/core/dtype/common.py
index 2c4910338e..4e24d64ad9 100644
--- a/src/zarr/core/dtype/common.py
+++ b/src/zarr/core/dtype/common.py
@@ -10,7 +10,7 @@
     from zarr.core.common import JSON, ZarrFormat
     from zarr.core.dtype._numpy import DateUnit, TimeUnit
 
-Endianness = Literal["little", "big", "native"]
+Endianness = Literal["little", "big"]
 JSONFloat = float | Literal["NaN", "Infinity", "-Infinity"]
 
 

From bb1186724ca31020685a69cbc9a8369069943b02 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Thu, 20 Mar 2025 15:34:45 +0100
Subject: [PATCH 041/130] adjust type annotations

---
 src/zarr/api/asynchronous.py    |  2 +-
 src/zarr/codecs/_v2.py          |  2 +-
 src/zarr/core/_info.py          |  6 +++---
 src/zarr/core/array.py          |  6 ++++--
 src/zarr/core/array_spec.py     |  6 +++---
 src/zarr/core/dtype/__init__.py |  4 ++++
 src/zarr/core/dtype/wrapper.py  |  4 ++--
 src/zarr/core/metadata/v3.py    |  9 +++++----
 tests/test_array.py             | 13 +++++++------
 tests/test_info.py              |  6 +++---
 10 files changed, 33 insertions(+), 25 deletions(-)

diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py
index 72a12f9acb..9a9e00c972 100644
--- a/src/zarr/api/asynchronous.py
+++ b/src/zarr/api/asynchronous.py
@@ -990,7 +990,7 @@ async def create(
             chunks = shape
         default_filters, default_compressor = _get_default_chunk_encoding_v2(dtype_wrapped)
         if filters is None:
-            filters = default_filters
+            filters = default_filters  # type: ignore[assignment]
         if compressor is None:
             compressor = default_compressor
     elif zarr_format == 3 and chunk_shape is None:  # type: ignore[redundant-expr]
diff --git a/src/zarr/codecs/_v2.py b/src/zarr/codecs/_v2.py
index a89d1f5fa4..c03e3c55fb 100644
--- a/src/zarr/codecs/_v2.py
+++ b/src/zarr/codecs/_v2.py
@@ -46,7 +46,7 @@ async def _decode_single(
         chunk = ensure_ndarray_like(chunk)
         # special case object dtype, because incorrect handling can lead to
         # segfaults and other bad things happening
-        if chunk_spec.dtype != object:
+        if chunk_spec.dtype.dtype_cls is not np.dtypes.ObjectDType:
             try:
                 chunk = chunk.view(chunk_spec.dtype.to_dtype())
             except TypeError:
diff --git a/src/zarr/core/_info.py b/src/zarr/core/_info.py
index 525b80c65f..3e605773bb 100644
--- a/src/zarr/core/_info.py
+++ b/src/zarr/core/_info.py
@@ -2,14 +2,14 @@
 
 import dataclasses
 import textwrap
-from typing import TYPE_CHECKING, Literal
+from typing import TYPE_CHECKING, Any, Literal
 
 if TYPE_CHECKING:
     import numcodecs.abc
 
     from zarr.abc.codec import ArrayArrayCodec, ArrayBytesCodec, BytesBytesCodec
     from zarr.core.common import ZarrFormat
-    from zarr.core.dtype.wrapper import ZDType, _BaseDType, _BaseScalar
+    from zarr.core.dtype.wrapper import ZDType
 
 
 @dataclasses.dataclass(kw_only=True)
@@ -80,7 +80,7 @@ class ArrayInfo:
 
     _type: Literal["Array"] = "Array"
     _zarr_format: ZarrFormat
-    _data_type: ZDType[_BaseDType, _BaseScalar]
+    _data_type: ZDType[Any, Any]
     _shape: tuple[int, ...]
     _shard_shape: tuple[int, ...] | None = None
     _chunk_shape: tuple[int, ...] | None = None
diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py
index 7b6eb455fc..91d6954e6c 100644
--- a/src/zarr/core/array.py
+++ b/src/zarr/core/array.py
@@ -680,6 +680,8 @@ def _create_metadata_v3(
         """
         Create an instance of ArrayV3Metadata.
         """
+        filters: tuple[ArrayArrayCodec, ...]
+        compressors: tuple[BytesBytesCodec, ...]
 
         shape = parse_shapelike(shape)
         if codecs is None:
@@ -707,7 +709,7 @@ def _create_metadata_v3(
             chunk_grid=chunk_grid_parsed,
             chunk_key_encoding=chunk_key_encoding_parsed,
             fill_value=fill_value_parsed,
-            codecs=codecs_parsed,
+            codecs=codecs_parsed,  # type: ignore[arg-type]
             dimension_names=tuple(dimension_names) if dimension_names else None,
             attributes=attributes or {},
         )
@@ -1712,7 +1714,7 @@ def _info(
     ) -> Any:
         return ArrayInfo(
             _zarr_format=self.metadata.zarr_format,
-            _data_type=self.dtype,
+            _data_type=self._zdtype,
             _shape=self.shape,
             _order=self.order,
             _shard_shape=self.shards,
diff --git a/src/zarr/core/array_spec.py b/src/zarr/core/array_spec.py
index e8e451944f..f1eac930c4 100644
--- a/src/zarr/core/array_spec.py
+++ b/src/zarr/core/array_spec.py
@@ -17,7 +17,7 @@
 
     from zarr.core.buffer import BufferPrototype
     from zarr.core.common import ChunkCoords
-    from zarr.core.dtype.wrapper import ZDType, _BaseDType, _BaseScalar
+    from zarr.core.dtype.wrapper import ZDType
 
 
 class ArrayConfigParams(TypedDict):
@@ -89,7 +89,7 @@ def parse_array_config(data: ArrayConfigLike | None) -> ArrayConfig:
 @dataclass(frozen=True)
 class ArraySpec:
     shape: ChunkCoords
-    dtype: ZDType[_BaseDType, _BaseScalar]
+    dtype: ZDType[Any, Any]
     fill_value: Any
     config: ArrayConfig
     prototype: BufferPrototype
@@ -97,7 +97,7 @@ class ArraySpec:
     def __init__(
         self,
         shape: ChunkCoords,
-        dtype: ZDType[_BaseDType, _BaseScalar],
+        dtype: ZDType[Any, Any],
         fill_value: Any,
         config: ArrayConfig,
         prototype: BufferPrototype,
diff --git a/src/zarr/core/dtype/__init__.py b/src/zarr/core/dtype/__init__.py
index 4e594f8796..fc494030f1 100644
--- a/src/zarr/core/dtype/__init__.py
+++ b/src/zarr/core/dtype/__init__.py
@@ -84,7 +84,11 @@
     data_type_registry.register(dtype._zarr_v3_name, dtype)
 
 
+# TODO: find a better name for this function
 def get_data_type_from_native_dtype(dtype: npt.DTypeLike) -> ZDType[_BaseDType, _BaseScalar]:
+    """
+    Get a data type wrapper (an instance of ``ZDType``) from a native data type, e.g. a numpy dtype.
+    """
     data_type_registry.lazy_load()
     if not isinstance(dtype, np.dtype):
         if dtype in (str, "str"):
diff --git a/src/zarr/core/dtype/wrapper.py b/src/zarr/core/dtype/wrapper.py
index 8707c3cda0..3409fa7ca4 100644
--- a/src/zarr/core/dtype/wrapper.py
+++ b/src/zarr/core/dtype/wrapper.py
@@ -22,7 +22,7 @@
 TDType = TypeVar("TDType", bound=_BaseDType)
 
 
-@dataclass(frozen=True, kw_only=True)
+@dataclass(frozen=True, kw_only=True, slots=True)
 class ZDType(Generic[TDType, TScalar], ABC):
     """
     Abstract base class for wrapping native array data types, e.g. numpy dtypes
@@ -62,7 +62,7 @@ def check_dtype(cls: type[Self], dtype: _BaseDType) -> TypeGuard[TDType]:
         return type(dtype) is cls.dtype_cls
 
     @classmethod
-    def from_dtype(cls: type[Self], dtype: TDType) -> Self:
+    def from_dtype(cls: type[Self], dtype: _BaseDType) -> Self:
         """
         Wrap a dtype object.
 
diff --git a/src/zarr/core/metadata/v3.py b/src/zarr/core/metadata/v3.py
index 117bb3c573..889946c6ea 100644
--- a/src/zarr/core/metadata/v3.py
+++ b/src/zarr/core/metadata/v3.py
@@ -109,7 +109,8 @@ def validate_codecs(codecs: tuple[Codec, ...], dtype: ZDType[_BaseDType, _BaseSc
     # we need to have special codecs if we are decoding vlen strings or bytestrings
     # TODO: use codec ID instead of class name
     codec_class_name = abc.__class__.__name__
-    if isinstance(dtype, VariableLengthString) and not codec_class_name == "VLenUTF8Codec":
+    # TODO: Fix typing here
+    if isinstance(dtype, VariableLengthString) and not codec_class_name == "VLenUTF8Codec":  # type: ignore[unreachable]
         raise ValueError(
             f"For string dtype, ArrayBytesCodec must be `VLenUTF8Codec`, got `{codec_class_name}`."
         )
@@ -407,11 +408,11 @@ def to_dict(self) -> dict[str, JSON]:
 
         # TODO: replace the `to_dict` / `from_dict` on the `Metadata`` class with
         # to_json, from_json, and have ZDType inherit from `Metadata`
-        # until then, we have this hack here
+        # until then, we have this hack here, which relies on the fact that to_dict will pass through
+        # any non-`Metadata` fields as-is.
         dtype_meta = out_dict["data_type"]
-
         if isinstance(dtype_meta, ZDType):
-            out_dict["data_type"] = dtype_meta.to_json(zarr_format=3)
+            out_dict["data_type"] = dtype_meta.to_json(zarr_format=3)  # type: ignore[unreachable]
 
         return out_dict
 
diff --git a/tests/test_array.py b/tests/test_array.py
index aa61860fa1..5ed5ba06b7 100644
--- a/tests/test_array.py
+++ b/tests/test_array.py
@@ -39,6 +39,7 @@
 from zarr.core.chunk_grids import _auto_partition
 from zarr.core.common import JSON, MemoryOrder, ZarrFormat
 from zarr.core.dtype import get_data_type_from_native_dtype
+from zarr.core.dtype._numpy import Float64
 from zarr.core.group import AsyncGroup
 from zarr.core.indexing import BasicIndexer, ceildiv
 from zarr.core.sync import sync
@@ -448,7 +449,7 @@ def test_info_v2(self, chunks: tuple[int, int], shards: tuple[int, int] | None)
         result = arr.info
         expected = ArrayInfo(
             _zarr_format=2,
-            _data_type=arr.dtype,
+            _data_type=arr._async_array._zdtype,
             _shape=(8, 8),
             _chunk_shape=chunks,
             _shard_shape=None,
@@ -465,7 +466,7 @@ def test_info_v3(self, chunks: tuple[int, int], shards: tuple[int, int] | None)
         result = arr.info
         expected = ArrayInfo(
             _zarr_format=3,
-            _data_type=arr.dtype,
+            _data_type=arr._async_array._zdtype,
             _shape=(8, 8),
             _chunk_shape=chunks,
             _shard_shape=shards,
@@ -490,7 +491,7 @@ def test_info_complete(self, chunks: tuple[int, int], shards: tuple[int, int] |
         result = arr.info_complete()
         expected = ArrayInfo(
             _zarr_format=3,
-            _data_type=arr.dtype,
+            _data_type=arr._async_array._zdtype,
             _shape=(8, 8),
             _chunk_shape=chunks,
             _shard_shape=shards,
@@ -525,7 +526,7 @@ async def test_info_v2_async(
         result = arr.info
         expected = ArrayInfo(
             _zarr_format=2,
-            _data_type=np.dtype("float64"),
+            _data_type=Float64(),
             _shape=(8, 8),
             _chunk_shape=(2, 2),
             _shard_shape=None,
@@ -550,7 +551,7 @@ async def test_info_v3_async(
         result = arr.info
         expected = ArrayInfo(
             _zarr_format=3,
-            _data_type=arr.dtype,
+            _data_type=arr._zdtype,
             _shape=(8, 8),
             _chunk_shape=chunks,
             _shard_shape=shards,
@@ -577,7 +578,7 @@ async def test_info_complete_async(
         result = await arr.info_complete()
         expected = ArrayInfo(
             _zarr_format=3,
-            _data_type=arr.dtype,
+            _data_type=arr._zdtype,
             _shape=(8, 8),
             _chunk_shape=chunks,
             _shard_shape=shards,
diff --git a/tests/test_info.py b/tests/test_info.py
index db0fd0ef76..8662be0ab0 100644
--- a/tests/test_info.py
+++ b/tests/test_info.py
@@ -1,11 +1,11 @@
 import textwrap
 
-import numpy as np
 import pytest
 
 from zarr.codecs.bytes import BytesCodec
 from zarr.core._info import ArrayInfo, GroupInfo, human_readable_size
 from zarr.core.common import ZarrFormat
+from zarr.core.dtype._numpy import Int32
 
 ZARR_FORMATS = [2, 3]
 
@@ -53,7 +53,7 @@ def test_group_info_complete(zarr_format: ZarrFormat) -> None:
 def test_array_info(zarr_format: ZarrFormat) -> None:
     info = ArrayInfo(
         _zarr_format=zarr_format,
-        _data_type=np.dtype("int32"),
+        _data_type=Int32(),
         _shape=(100, 100),
         _chunk_shape=(10, 100),
         _order="C",
@@ -91,7 +91,7 @@ def test_array_info_complete(
     ) = bytes_things
     info = ArrayInfo(
         _zarr_format=zarr_format,
-        _data_type=np.dtype("int32"),
+        _data_type=Int32(),
         _shape=(100, 100),
         _chunk_shape=(10, 100),
         _order="C",

From 7a619e0be900e8979e58c3bedfa980a104920dd0 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Thu, 20 Mar 2025 15:35:08 +0100
Subject: [PATCH 042/130] fix info tests to use zdtype

---
 tests/test_info.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_info.py b/tests/test_info.py
index 8662be0ab0..2e465b6a21 100644
--- a/tests/test_info.py
+++ b/tests/test_info.py
@@ -65,7 +65,7 @@ def test_array_info(zarr_format: ZarrFormat) -> None:
     assert result == textwrap.dedent(f"""\
         Type               : Array
         Zarr format        : {zarr_format}
-        Data type          : int32
+        Data type          : Int32(endianness='little')
         Shape              : (100, 100)
         Chunk shape        : (10, 100)
         Order              : C
@@ -106,7 +106,7 @@ def test_array_info_complete(
     assert result == textwrap.dedent(f"""\
         Type               : Array
         Zarr format        : {zarr_format}
-        Data type          : int32
+        Data type          : Int32(endianness='little')
         Shape              : (100, 100)
         Chunk shape        : (10, 100)
         Order              : C

From ea2d0bf6ccedd57297e570d28a0fe6706de43604 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Thu, 20 Mar 2025 16:31:05 +0100
Subject: [PATCH 043/130] remove dead code and add code coverage exemption to
 zarr format checks

---
 src/zarr/core/dtype/_numpy.py | 116 +++++++++++++++++-----------------
 src/zarr/core/dtype/common.py |   2 +-
 src/zarr/core/metadata/v3.py  |  59 -----------------
 3 files changed, 59 insertions(+), 118 deletions(-)

diff --git a/src/zarr/core/dtype/_numpy.py b/src/zarr/core/dtype/_numpy.py
index f8ebc807d3..55bd86a61d 100644
--- a/src/zarr/core/dtype/_numpy.py
+++ b/src/zarr/core/dtype/_numpy.py
@@ -77,14 +77,14 @@ def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[Literal["b
             return data in cls._zarr_v2_names
         elif zarr_format == 3:
             return data == cls._zarr_v3_name
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     def to_json(self, zarr_format: ZarrFormat) -> str:
         if zarr_format == 2:
             return self.to_dtype().str
         elif zarr_format == 3:
             return self._zarr_v3_name
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     @classmethod
     def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
@@ -162,14 +162,14 @@ def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[Literal["i
             return data in cls._zarr_v2_names
         elif zarr_format == 3:
             return data == cls._zarr_v3_name
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     def to_json(self, zarr_format: ZarrFormat) -> str:
         if zarr_format == 2:
             return self.to_dtype().str
         elif zarr_format == 3:
             return self._zarr_v3_name
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     @classmethod
     def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
@@ -247,14 +247,14 @@ def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[Literal["u
             return data in cls._zarr_v2_names
         elif zarr_format == 3:
             return data == cls._zarr_v3_name
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     def to_json(self, zarr_format: ZarrFormat) -> str:
         if zarr_format == 2:
             return self.to_dtype().str
         elif zarr_format == 3:
             return self._zarr_v3_name
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     @classmethod
     def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
@@ -337,14 +337,14 @@ def check_json(
             return data in cls._zarr_v2_names
         elif zarr_format == 3:
             return data == cls._zarr_v3_name
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     def to_json(self, zarr_format: ZarrFormat) -> str:
         if zarr_format == 2:
             return self.to_dtype().str
         elif zarr_format == 3:
             return self._zarr_v3_name
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     @classmethod
     def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
@@ -352,7 +352,7 @@ def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
             return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
         elif zarr_format == 3:
             return cls()
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     def default_value(self) -> np.int16:
         return self.to_dtype().type(0)
@@ -393,14 +393,14 @@ def check_json(
             return data in cls._zarr_v2_names
         elif zarr_format == 3:
             return data == cls._zarr_v3_name
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     def to_json(self, zarr_format: ZarrFormat) -> str:
         if zarr_format == 2:
             return self.to_dtype().str
         elif zarr_format == 3:
             return self._zarr_v3_name
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     @classmethod
     def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
@@ -408,7 +408,7 @@ def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
             return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
         elif zarr_format == 3:
             return cls()
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     def default_value(self) -> np.uint16:
         return self.to_dtype().type(0)
@@ -449,14 +449,14 @@ def check_json(
             return data in cls._zarr_v2_names
         elif zarr_format == 3:
             return data == cls._zarr_v3_name
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     def to_json(self, zarr_format: ZarrFormat) -> str:
         if zarr_format == 2:
             return self.to_dtype().str
         elif zarr_format == 3:
             return self._zarr_v3_name
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     @classmethod
     def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
@@ -464,7 +464,7 @@ def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
             return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
         elif zarr_format == 3:
             return cls()
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     def default_value(self) -> np.int32:
         return self.to_dtype().type(0)
@@ -505,14 +505,14 @@ def check_json(
             return data in cls._zarr_v2_names
         elif zarr_format == 3:
             return data == cls._zarr_v3_name
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     def to_json(self, zarr_format: ZarrFormat) -> str:
         if zarr_format == 2:
             return self.to_dtype().str
         elif zarr_format == 3:
             return self._zarr_v3_name
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     @classmethod
     def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
@@ -520,7 +520,7 @@ def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
             return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
         elif zarr_format == 3:
             return cls()
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     def default_value(self) -> np.uint32:
         return self.to_dtype().type(0)
@@ -561,14 +561,14 @@ def check_json(
             return data in cls._zarr_v2_names
         elif zarr_format == 3:
             return data == cls._zarr_v3_name
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     def to_json(self, zarr_format: ZarrFormat) -> str:
         if zarr_format == 2:
             return self.to_dtype().str
         elif zarr_format == 3:
             return self._zarr_v3_name
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     @classmethod
     def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
@@ -576,7 +576,7 @@ def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
             return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
         elif zarr_format == 3:
             return cls()
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     def default_value(self) -> np.int64:
         return self.to_dtype().type(0)
@@ -617,14 +617,14 @@ def check_json(
             return data in cls._zarr_v2_names
         elif zarr_format == 3:
             return data == cls._zarr_v3_name
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     def to_json(self, zarr_format: ZarrFormat) -> str:
         if zarr_format == 2:
             return self.to_dtype().str
         elif zarr_format == 3:
             return self._zarr_v3_name
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     @classmethod
     def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
@@ -632,7 +632,7 @@ def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
             return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
         elif zarr_format == 3:
             return cls()
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     def default_value(self) -> np.uint64:
         return self.to_dtype().type(0)
@@ -673,14 +673,14 @@ def check_json(
             return data in cls._zarr_v2_names
         elif zarr_format == 3:
             return data == cls._zarr_v3_name
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     def to_json(self, zarr_format: ZarrFormat) -> str:
         if zarr_format == 2:
             return self.to_dtype().str
         elif zarr_format == 3:
             return self._zarr_v3_name
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     @classmethod
     def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
@@ -688,7 +688,7 @@ def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
             return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
         elif zarr_format == 3:
             return cls()
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     def default_value(self) -> np.float16:
         return self.to_dtype().type(0)
@@ -729,14 +729,14 @@ def check_json(
             return data in cls._zarr_v2_names
         elif zarr_format == 3:
             return data == cls._zarr_v3_name
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     def to_json(self, zarr_format: ZarrFormat) -> str:
         if zarr_format == 2:
             return self.to_dtype().str
         elif zarr_format == 3:
             return self._zarr_v3_name
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     @classmethod
     def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
@@ -744,7 +744,7 @@ def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
             return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
         elif zarr_format == 3:
             return cls()
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     def default_value(self) -> np.float32:
         return self.to_dtype().type(0)
@@ -785,14 +785,14 @@ def check_json(
             return data in cls._zarr_v2_names
         elif zarr_format == 3:
             return data == cls._zarr_v3_name
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     def to_json(self, zarr_format: ZarrFormat) -> str:
         if zarr_format == 2:
             return self.to_dtype().str
         elif zarr_format == 3:
             return self._zarr_v3_name
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     @classmethod
     def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
@@ -800,7 +800,7 @@ def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
             return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
         elif zarr_format == 3:
             return cls()
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     def default_value(self) -> np.float64:
         return self.to_dtype().type(0)
@@ -841,14 +841,14 @@ def check_json(
             return data in cls._zarr_v2_names
         elif zarr_format == 3:
             return data == cls._zarr_v3_name
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     def to_json(self, zarr_format: ZarrFormat) -> str:
         if zarr_format == 2:
             return self.to_dtype().str
         elif zarr_format == 3:
             return self._zarr_v3_name
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     @classmethod
     def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
@@ -856,7 +856,7 @@ def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
             return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
         elif zarr_format == 3:
             return cls()
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     def default_value(self) -> np.complex64:
         return self.to_dtype().type(0)
@@ -899,14 +899,14 @@ def check_json(
             return data in cls._zarr_v2_names
         elif zarr_format == 3:
             return data == cls._zarr_v3_name
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     def to_json(self, zarr_format: ZarrFormat) -> str:
         if zarr_format == 2:
             return self.to_dtype().str
         elif zarr_format == 3:
             return self._zarr_v3_name
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     @classmethod
     def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
@@ -914,7 +914,7 @@ def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
             return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
         elif zarr_format == 3:
             return cls()
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     def default_value(self) -> np.complex128:
         return self.to_dtype().type(0)
@@ -962,7 +962,7 @@ def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[JSON]:
                 and "length_bits" in data["configuration"]
                 and isinstance(data["configuration"]["length_bits"], int)
             )
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     def to_json(self, zarr_format: ZarrFormat) -> JSON:
         if zarr_format == 2:
@@ -972,7 +972,7 @@ def to_json(self, zarr_format: ZarrFormat) -> JSON:
                 "name": self._zarr_v3_name,
                 "configuration": {"length_bits": self.length * self.item_size_bits},
             }
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     @classmethod
     def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
@@ -980,7 +980,7 @@ def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
             return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
         elif zarr_format == 3:
             return cls(length=data["configuration"]["length_bits"] // cls.item_size_bits)  # type: ignore[arg-type, index, call-overload, operator]
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     def default_value(self) -> np.bytes_:
         return np.bytes_(b"")
@@ -1025,14 +1025,14 @@ def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[JSON]:
                 and isinstance(data["name"], str)
                 and (re.match(r"^r\d+$", data["name"]) is not None)
             )
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     def to_json(self, zarr_format: ZarrFormat) -> JSON:
         if zarr_format == 2:
             return self.to_dtype().str
         elif zarr_format == 3:
             return {"name": f"r{self.length * self.item_size_bits}"}
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     @classmethod
     def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
@@ -1040,7 +1040,7 @@ def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
             return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
         elif zarr_format == 3:
             return cls(length=int(data["name"][1:]) // cls.item_size_bits)  # type: ignore[arg-type, index, call-overload]
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     @classmethod
     def check_dtype(cls: type[Self], dtype: _BaseDType) -> TypeGuard[np.dtypes.VoidDType[Any]]:
@@ -1113,7 +1113,7 @@ def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[JSON]:
                 and "length_bits" in data["configuration"]
                 and isinstance(data["configuration"]["length_bits"], int)
             )
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     def to_json(self, zarr_format: ZarrFormat) -> JSON:
         if zarr_format == 2:
@@ -1123,7 +1123,7 @@ def to_json(self, zarr_format: ZarrFormat) -> JSON:
                 "name": self._zarr_v3_name,
                 "configuration": {"length_bits": self.length * self.item_size_bits},
             }
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     @classmethod
     def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
@@ -1131,7 +1131,7 @@ def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
             return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
         elif zarr_format == 3:
             return cls(length=data["configuration"]["length_bits"] // cls.item_size_bits)  # type: ignore[arg-type, index, call-overload, operator]
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     def default_value(self) -> np.str_:
         return np.str_("")
@@ -1174,7 +1174,7 @@ def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[JSON]:
                 return data == "|O"
             elif zarr_format == 3:
                 return data == cls._zarr_v3_name
-            raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+            raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
         def to_json(self, zarr_format: ZarrFormat) -> JSON:
             if zarr_format == 2:
@@ -1185,7 +1185,7 @@ def to_json(self, zarr_format: ZarrFormat) -> JSON:
                 return "|O"
             elif zarr_format == 3:
                 return self._zarr_v3_name
-            raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+            raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
         @classmethod
         def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
@@ -1227,14 +1227,14 @@ def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[JSON]:
                 return data == "|O"
             elif zarr_format == 3:
                 return data == cls._zarr_v3_name
-            raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+            raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
         def to_json(self, zarr_format: ZarrFormat) -> JSON:
             if zarr_format == 2:
                 return self.to_dtype().str
             elif zarr_format == 3:
                 return self._zarr_v3_name
-            raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+            raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
         @classmethod
         def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
@@ -1306,7 +1306,7 @@ def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[JSON]:
                 and "unit" in data["configuration"]
                 and data["configuration"]["unit"] in get_args(DateUnit) + get_args(TimeUnit)
             )
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     def default_value(self) -> np.datetime64:
         return np.datetime64("NaT")
@@ -1316,7 +1316,7 @@ def to_json(self, zarr_format: ZarrFormat) -> JSON:
             return self.to_dtype().str
         elif zarr_format == 3:
             return {"name": self._zarr_v3_name, "configuration": {"unit": self.unit}}
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     @classmethod
     def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
@@ -1324,7 +1324,7 @@ def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
             return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
         elif zarr_format == 3:
             return cls(unit=data["configuration"]["unit"])  # type: ignore[arg-type, index, call-overload]
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.datetime64:
         if check_json_int(data):
@@ -1391,7 +1391,7 @@ def to_json(self, zarr_format: ZarrFormat) -> JSON:
             base_dict = {"name": self._zarr_v3_name}
             base_dict["configuration"] = {"fields": fields}  # type: ignore[assignment]
             return cast("JSON", base_dict)
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     @classmethod
     def check_json(
@@ -1416,7 +1416,7 @@ def check_json(
                 and isinstance(data["configuration"], dict)
                 and "fields" in data["configuration"]
             )
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     @classmethod
     def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
@@ -1441,7 +1441,7 @@ def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
                             for f_name, f_dtype in meta_fields
                         )
                         return cls(fields=fields)
-            raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+            raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
         raise DataTypeValidationError(f"Invalid JSON representation of data type {cls}.")
 
     def to_dtype(self) -> np.dtypes.VoidDType[int]:
diff --git a/src/zarr/core/dtype/common.py b/src/zarr/core/dtype/common.py
index 4e24d64ad9..78dc6bdacd 100644
--- a/src/zarr/core/dtype/common.py
+++ b/src/zarr/core/dtype/common.py
@@ -530,4 +530,4 @@ def datetime_from_json(data: int, unit: DateUnit | TimeUnit) -> np.datetime64:
     np.datetime64
         The datetime64 value.
     """
-    return cast(np.datetime64, np.int64(data).view(f"datetime64[{unit}]"))
+    return cast("np.datetime64", np.int64(data).view(f"datetime64[{unit}]"))
diff --git a/src/zarr/core/metadata/v3.py b/src/zarr/core/metadata/v3.py
index 889946c6ea..ead05b5e44 100644
--- a/src/zarr/core/metadata/v3.py
+++ b/src/zarr/core/metadata/v3.py
@@ -23,10 +23,8 @@
 import json
 from collections.abc import Iterable
 from dataclasses import dataclass, field, replace
-from enum import Enum
 from typing import Any, Literal
 
-import numcodecs.abc
 import numpy as np
 
 from zarr.abc.codec import ArrayArrayCodec, ArrayBytesCodec, BytesBytesCodec, Codec
@@ -169,60 +167,6 @@ def __init__(
             default=default,
         )
 
-    def default(self, o: object) -> Any:
-        if isinstance(o, np.dtype):
-            return str(o)
-        if np.isscalar(o):
-            out: Any
-            if hasattr(o, "dtype") and o.dtype.kind == "M" and hasattr(o, "view"):
-                # https://github.com/zarr-developers/zarr-python/issues/2119
-                # `.item()` on a datetime type might or might not return an
-                # integer, depending on the value.
-                # Explicitly cast to an int first, and then grab .item()
-                out = o.view("i8").item()
-            else:
-                # convert numpy scalar to python type, and pass
-                # python types through
-                out = getattr(o, "item", lambda: o)()
-                if isinstance(out, complex):
-                    # python complex types are not JSON serializable, so we use the
-                    # serialization defined in the zarr v3 spec
-                    return _replace_special_floats([out.real, out.imag])
-                elif np.isnan(out):
-                    return "NaN"
-                elif np.isinf(out):
-                    return "Infinity" if out > 0 else "-Infinity"
-            return out
-        elif isinstance(o, Enum):
-            return o.name
-        # this serializes numcodecs compressors
-        # todo: implement to_dict for codecs
-        elif isinstance(o, numcodecs.abc.Codec):
-            config: dict[str, Any] = o.get_config()
-            return config
-        else:
-            return super().default(o)
-
-
-def _replace_special_floats(obj: object) -> Any:
-    """Helper function to replace NaN/Inf/-Inf values with special strings
-
-    Note: this cannot be done in the V3JsonEncoder because Python's `json.dumps` optimistically
-    converts NaN/Inf values to special types outside of the encoding step.
-    """
-    if isinstance(obj, float):
-        if np.isnan(obj):
-            return "NaN"
-        elif np.isinf(obj):
-            return "Infinity" if obj > 0 else "-Infinity"
-    elif isinstance(obj, dict):
-        # Recursively replace in dictionaries
-        return {k: _replace_special_floats(v) for k, v in obj.items()}
-    elif isinstance(obj, list):
-        # Recursively replace in lists
-        return [_replace_special_floats(item) for item in obj]
-    return obj
-
 
 class ArrayV3MetadataDict(TypedDict):
     """
@@ -264,9 +208,6 @@ def __init__(
         Because the class is a frozen dataclass, we set attributes using object.__setattr__
         """
 
-        # TODO: remove this
-        if not isinstance(data_type, ZDType):
-            raise TypeError
         shape_parsed = parse_shapelike(shape)
         chunk_grid_parsed = ChunkGrid.from_dict(chunk_grid)
         chunk_key_encoding_parsed = ChunkKeyEncoding.from_dict(chunk_key_encoding)

From 042c9e5e2b7d43172067376c4099d7a1ce4d7c08 Mon Sep 17 00:00:00 2001
From: Davis Bennett <davis.v.bennett@gmail.com>
Date: Thu, 20 Mar 2025 17:50:53 +0100
Subject: [PATCH 044/130] fix: add special check for resolving int32 on windows

---
 src/zarr/core/dtype/_numpy.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/src/zarr/core/dtype/_numpy.py b/src/zarr/core/dtype/_numpy.py
index 55bd86a61d..241626e6ac 100644
--- a/src/zarr/core/dtype/_numpy.py
+++ b/src/zarr/core/dtype/_numpy.py
@@ -429,6 +429,15 @@ class Int32(ZDType[np.dtypes.Int32DType, np.int32]):
     _zarr_v2_names: ClassVar[tuple[str, ...]] = (">i4", "<i4")
     endianness: Endianness | None = "little"
 
+    @classmethod
+    def from_dtype(cls: type[Self], dtype: _BaseDType) -> Self:
+        # We override the base implementation to address a windows-specific, pre-numpy 2 issue where
+        # ``np.dtype('i')`` is an instance of ``np.dtypes.IntDType`` that acts like `int32` instead of ``np.dtype('int32')``
+        if dtype == np.dtypes.Int32DType():
+            return cls._from_dtype_unsafe(np.dtypes.Int32DType().newbyteorder(dtype.byteorder))
+        else:
+            return super().from_dtype(dtype)
+
     @classmethod
     def _from_dtype_unsafe(cls, dtype: np.dtypes.Int32DType) -> Self:
         byte_order = cast("EndiannessNumpy", dtype.byteorder)

From def5eb24976a335f13d18c1ae36a094b7e588729 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Thu, 20 Mar 2025 22:26:05 +0100
Subject: [PATCH 045/130] add dtype entry point test

---
 src/zarr/core/dtype/registry.py               |  3 +
 .../entry_points.txt                          |  2 +
 tests/package_with_entrypoint/__init__.py     | 19 ++++++
 tests/{test_metadata => }/test_dtype.py       | 66 ++++++++++++++-----
 4 files changed, 74 insertions(+), 16 deletions(-)
 rename tests/{test_metadata => }/test_dtype.py (84%)

diff --git a/src/zarr/core/dtype/registry.py b/src/zarr/core/dtype/registry.py
index 0d07ab2b9d..4ad2158f96 100644
--- a/src/zarr/core/dtype/registry.py
+++ b/src/zarr/core/dtype/registry.py
@@ -12,6 +12,9 @@
     from zarr.core.dtype.wrapper import ZDType, _BaseDType, _BaseScalar
 
 
+# This class is different from the other registry classes, which inherit from
+# dict. IMO it's simpler to just do a dataclass. But long-term we should
+# have just 1 registry class in use.
 @dataclass(frozen=True, kw_only=True)
 class DataTypeRegistry:
     contents: dict[str, type[ZDType[_BaseDType, _BaseScalar]]] = field(
diff --git a/tests/package_with_entrypoint-0.1.dist-info/entry_points.txt b/tests/package_with_entrypoint-0.1.dist-info/entry_points.txt
index eee724c912..7eb0eb7c86 100644
--- a/tests/package_with_entrypoint-0.1.dist-info/entry_points.txt
+++ b/tests/package_with_entrypoint-0.1.dist-info/entry_points.txt
@@ -12,3 +12,5 @@ another_buffer = package_with_entrypoint:TestEntrypointGroup.Buffer
 another_ndbuffer = package_with_entrypoint:TestEntrypointGroup.NDBuffer
 [zarr.codec_pipeline]
 another_pipeline = package_with_entrypoint:TestEntrypointGroup.Pipeline
+[zarr.data_type]
+new_data_type = package_with_entrypoint:TestDataType
\ No newline at end of file
diff --git a/tests/package_with_entrypoint/__init__.py b/tests/package_with_entrypoint/__init__.py
index b818adf8ea..eed2ac43e5 100644
--- a/tests/package_with_entrypoint/__init__.py
+++ b/tests/package_with_entrypoint/__init__.py
@@ -1,4 +1,5 @@
 from collections.abc import Iterable
+from typing import Any, Literal, Self
 
 from numpy import ndarray
 
@@ -8,6 +9,7 @@
 from zarr.core.array_spec import ArraySpec
 from zarr.core.buffer import Buffer, NDBuffer
 from zarr.core.common import BytesLike
+from zarr.core.dtype import Bool
 
 
 class TestEntrypointCodec(ArrayBytesCodec):
@@ -64,3 +66,20 @@ class NDBuffer(zarr.core.buffer.NDBuffer):
 
     class Pipeline(CodecPipeline):
         pass
+
+
+class TestDataType(Bool):
+    """
+    This is a "data type" that serializes to "test"
+    """
+
+    _zarr_v3_name = "test"
+
+    @classmethod
+    def from_json(cls, data: Any, zarr_format: Literal[2, 3]) -> Self:
+        if data == cls._zarr_v3_name:
+            return cls()
+        raise ValueError
+
+    def to_json(self, zarr_format):
+        return self._zarr_v3_name
diff --git a/tests/test_metadata/test_dtype.py b/tests/test_dtype.py
similarity index 84%
rename from tests/test_metadata/test_dtype.py
rename to tests/test_dtype.py
index db575ee16a..f690e6ce26 100644
--- a/tests/test_metadata/test_dtype.py
+++ b/tests/test_dtype.py
@@ -1,9 +1,16 @@
 from __future__ import annotations
 
+import os
 import re
+import sys
 from typing import TYPE_CHECKING, Any, get_args
 
+import zarr
+from zarr.core.config import config
+
 if TYPE_CHECKING:
+    from collections.abc import Generator
+
     from zarr.core.common import ZarrFormat
     from zarr.core.dtype.wrapper import _BaseDType, _BaseScalar
 
@@ -15,6 +22,7 @@
     VariableLengthString,
     ZDType,
     data_type_registry,
+    get_data_type_from_json,
 )
 from zarr.core.dtype._numpy import (
     Bool,
@@ -47,6 +55,7 @@ def data_type_registry_fixture() -> DataTypeRegistry:
 
 
 _NUMPY_SUPPORTS_VLEN_STRING = hasattr(np.dtypes, "StringDType")
+VLEN_STRING_DTYPE: np.dtypes.StringDType | np.dtypes.ObjectDType
 if _NUMPY_SUPPORTS_VLEN_STRING:
     VLEN_STRING_DTYPE = np.dtypes.StringDType()
     VLEN_STRING_CODE = "T"
@@ -79,7 +88,9 @@ def data_type_registry_fixture() -> DataTypeRegistry:
         (DateTime64, "datetime64[s]"),
     ],
 )
-def test_wrap(wrapper_cls: type[ZDType[_BaseDType, _BaseScalar]], np_dtype: np.dtype | str) -> None:
+def test_wrap(
+    wrapper_cls: type[ZDType[_BaseDType, _BaseScalar]], np_dtype: np.dtype[np.generic] | str
+) -> None:
     """
     Test that the wrapper class has the correct dtype class bound to the dtype_cls variable
     Test that the ``wrap`` method produces an instance of the wrapper class
@@ -90,14 +101,14 @@ def test_wrap(wrapper_cls: type[ZDType[_BaseDType, _BaseScalar]], np_dtype: np.d
     wrapped = wrapper_cls.from_dtype(dt)
 
     with pytest.raises(DataTypeValidationError, match="Invalid dtype"):
-        wrapper_cls.from_dtype("not a dtype")
+        wrapper_cls.from_dtype("not a dtype")  # type: ignore[arg-type]
 
     assert isinstance(wrapped, wrapper_cls)
     assert wrapped.to_dtype() == dt
 
 
 @pytest.mark.parametrize("wrapper_cls", get_args(DTYPE))
-def test_dict_serialization(wrapper_cls: DTYPE, zarr_format: ZarrFormat) -> None:
+def test_dict_serialization(wrapper_cls: Any, zarr_format: ZarrFormat) -> None:
     if issubclass(wrapper_cls, Structured):
         instance = wrapper_cls(fields=((("a", Bool()),)))
     else:
@@ -127,16 +138,14 @@ def test_dict_serialization(wrapper_cls: DTYPE, zarr_format: ZarrFormat) -> None
         (FixedLengthBytes(length=3), np.void(b"\x00\x00\x00")),
         (FixedLengthUnicode(length=3), np.str_("")),
         (
-            Structured(fields=(("a", Float64()), ("b", Int8()))),
+            Structured(fields=(("a", Float64()), ("b", Int8()))),  # type: ignore[arg-type]
             np.array([0], dtype=[("a", np.float64), ("b", np.int8)])[0],
         ),
         (VariableLengthString(), ""),
         (DateTime64(unit="s"), np.datetime64("NaT")),
     ],
 )
-def test_default_value(
-    wrapper: type[ZDType[_BaseDType, _BaseScalar]], expected_default: Any
-) -> None:
+def test_default_value(wrapper: ZDType[Any, Any], expected_default: Any) -> None:
     """
     Test that the default_value method is correctly set for each dtype wrapper.
     """
@@ -171,7 +180,7 @@ def test_default_value(
     ],
 )
 def test_to_json_value_v2(
-    wrapper: type[ZDType[_BaseDType, _BaseScalar]], input_value: Any, expected_json: Any
+    wrapper: ZDType[_BaseDType, _BaseScalar], input_value: Any, expected_json: Any
 ) -> None:
     """
     Test the to_json_value method for each dtype wrapper for zarr v2
@@ -204,7 +213,7 @@ def test_to_json_value_v2(
     ],
 )
 def test_from_json_value(
-    wrapper: type[ZDType[_BaseDType, _BaseScalar]], json_value: Any, expected_value: Any
+    wrapper: ZDType[_BaseDType, _BaseScalar], json_value: Any, expected_value: Any
 ) -> None:
     """
     Test the from_json_value method for each dtype wrapper.
@@ -218,7 +227,7 @@ def test_register(data_type_registry_fixture: DataTypeRegistry) -> None:
         """
         Test that registering a dtype in a data type registry works.
         """
-        data_type_registry_fixture.register(Bool._zarr_v3_name, Bool)
+        data_type_registry_fixture.register(Bool._zarr_v3_name, Bool)  # type: ignore[arg-type]
         assert data_type_registry_fixture.get(Bool._zarr_v3_name) == Bool
         assert isinstance(data_type_registry_fixture.match_dtype(np.dtype("bool")), Bool)
 
@@ -227,13 +236,13 @@ def test_override(data_type_registry_fixture: DataTypeRegistry) -> None:
         """
         Test that registering a new dtype with the same name works (overriding the previous one).
         """
-        data_type_registry_fixture.register(Bool._zarr_v3_name, Bool)
+        data_type_registry_fixture.register(Bool._zarr_v3_name, Bool)  # type: ignore[arg-type]
 
         class NewBool(Bool):
             def default_value(self) -> np.bool_:
                 return np.True_
 
-        data_type_registry_fixture.register(NewBool._zarr_v3_name, NewBool)
+        data_type_registry_fixture.register(NewBool._zarr_v3_name, NewBool)  # type: ignore[arg-type]
         assert isinstance(data_type_registry_fixture.match_dtype(np.dtype("bool")), NewBool)
 
     @staticmethod
@@ -268,13 +277,13 @@ def test_unregistered_dtype(data_type_registry_fixture: DataTypeRegistry) -> Non
     @staticmethod
     @pytest.mark.parametrize("wrapper_cls", get_args(DTYPE))
     def test_registered_dtypes(
-        wrapper_cls: ZDType[_BaseDType, _BaseScalar], zarr_format: ZarrFormat
+        wrapper_cls: type[ZDType[_BaseDType, _BaseScalar]], zarr_format: ZarrFormat
     ) -> None:
         """
         Test that the registered dtypes can be retrieved from the registry.
         """
         if issubclass(wrapper_cls, Structured):
-            instance = wrapper_cls(fields=((("a", Bool()),)))
+            instance = wrapper_cls(fields=((("a", Bool()),)))  # type: ignore[misc]
         else:
             instance = wrapper_cls()
 
@@ -289,7 +298,7 @@ def test_registered_dtypes(
     @staticmethod
     @pytest.mark.parametrize("wrapper_cls", get_args(DTYPE))
     def test_match_dtype_unique(
-        wrapper_cls: ZDType[_BaseDType, _BaseScalar],
+        wrapper_cls: type[ZDType[_BaseDType, _BaseScalar]],
         data_type_registry_fixture: DataTypeRegistry,
         zarr_format: ZarrFormat,
     ) -> None:
@@ -303,7 +312,7 @@ def test_match_dtype_unique(
                 data_type_registry_fixture.register(_cls._zarr_v3_name, _cls)
 
         if issubclass(wrapper_cls, Structured):
-            instance = wrapper_cls(fields=((("a", Bool()),)))
+            instance = wrapper_cls(fields=((("a", Bool()),)))  # type: ignore[misc]
         else:
             instance = wrapper_cls()
         dtype_instance = instance.to_dtype()
@@ -316,3 +325,28 @@ def test_match_dtype_unique(
         msg = f"No data type wrapper found that matches {instance_dict}"
         with pytest.raises(ValueError, match=re.escape(msg)):
             data_type_registry_fixture.match_json(instance_dict, zarr_format=zarr_format)
+
+
+# this is copied from the registry tests -- we should deduplicate
+here = os.path.abspath(os.path.dirname(__file__))
+
+
+@pytest.fixture
+def set_path() -> Generator[None, None, None]:
+    sys.path.append(here)
+    zarr.registry._collect_entrypoints()
+    yield
+    sys.path.remove(here)
+    registries = zarr.registry._collect_entrypoints()
+    for registry in registries:
+        registry.lazy_load_list.clear()
+    config.reset()
+
+
+@pytest.mark.usefixtures("set_path")
+def test_entrypoint_codec(zarr_format: ZarrFormat) -> None:
+    from package_with_entrypoint import TestDataType
+
+    instance = TestDataType()
+    dtype_json = instance.to_json(zarr_format=zarr_format)
+    assert get_data_type_from_json(dtype_json, zarr_format=zarr_format) == instance

From 1b7273b719b8801cbda5300d857a618496587dce Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Fri, 21 Mar 2025 11:50:17 +0100
Subject: [PATCH 046/130] remove default parameters for parametric dtypes; add
 mixin classes for numpy dtypes; define zdtypelike

---
 src/zarr/api/synchronous.py     |  5 ++-
 src/zarr/core/array.py          | 36 +++++++++--------
 src/zarr/core/dtype/__init__.py | 26 ++++++------
 src/zarr/core/dtype/_numpy.py   | 71 ++++++++++++++++++---------------
 src/zarr/core/dtype/common.py   |  7 ++--
 5 files changed, 76 insertions(+), 69 deletions(-)

diff --git a/src/zarr/api/synchronous.py b/src/zarr/api/synchronous.py
index 9424ae1fde..79a5c47d71 100644
--- a/src/zarr/api/synchronous.py
+++ b/src/zarr/api/synchronous.py
@@ -37,6 +37,7 @@
         ShapeLike,
         ZarrFormat,
     )
+    from zarr.core.dtype import ZDTypeLike
     from zarr.storage import StoreLike
 
 __all__ = [
@@ -748,7 +749,7 @@ def create_array(
     *,
     name: str | None = None,
     shape: ShapeLike | None = None,
-    dtype: npt.DTypeLike | None = None,
+    dtype: ZDTypeLike | None = None,
     data: np.ndarray[Any, np.dtype[Any]] | None = None,
     chunks: ChunkCoords | Literal["auto"] = "auto",
     shards: ShardsLike | None = None,
@@ -778,7 +779,7 @@ def create_array(
         at the root of the store.
     shape : ChunkCoords, optional
         Shape of the array. Can be ``None`` if ``data`` is provided.
-    dtype : npt.DTypeLike, optional
+    dtype : ZDTypeLike, optional
         Data type of the array. Can be ``None`` if ``data`` is provided.
     data : np.ndarray, optional
         Array-like data to use for initializing the array. If this parameter is provided, the
diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py
index 91d6954e6c..6c34c0d351 100644
--- a/src/zarr/core/array.py
+++ b/src/zarr/core/array.py
@@ -23,7 +23,6 @@
 import numcodecs
 import numcodecs.abc
 import numpy as np
-import numpy.typing as npt
 from typing_extensions import deprecated
 
 from zarr._compat import _deprecate_positional_args
@@ -67,6 +66,7 @@
 from zarr.core.config import config as zarr_config
 from zarr.core.dtype import (
     ZDType,
+    ZDTypeLike,
     parse_data_type,
 )
 from zarr.core.indexing import (
@@ -122,6 +122,8 @@
     from collections.abc import Iterator, Sequence
     from typing import Self
 
+    import numpy.typing as npt
+
     from zarr.abc.codec import CodecPipeline
     from zarr.codecs.sharding import ShardingCodecIndexLocation
     from zarr.core.dtype.wrapper import _BaseDType, _BaseScalar
@@ -295,7 +297,7 @@ async def create(
         *,
         # v2 and v3
         shape: ShapeLike,
-        dtype: npt.DTypeLike,
+        dtype: ZDTypeLike,
         zarr_format: Literal[2],
         fill_value: Any | None = None,
         attributes: dict[str, JSON] | None = None,
@@ -319,7 +321,7 @@ async def create(
         *,
         # v2 and v3
         shape: ShapeLike,
-        dtype: npt.DTypeLike,
+        dtype: ZDTypeLike,
         zarr_format: Literal[3],
         fill_value: Any | None = None,
         attributes: dict[str, JSON] | None = None,
@@ -347,7 +349,7 @@ async def create(
         *,
         # v2 and v3
         shape: ShapeLike,
-        dtype: npt.DTypeLike,
+        dtype: ZDTypeLike,
         zarr_format: Literal[3] = 3,
         fill_value: Any | None = None,
         attributes: dict[str, JSON] | None = None,
@@ -375,7 +377,7 @@ async def create(
         *,
         # v2 and v3
         shape: ShapeLike,
-        dtype: npt.DTypeLike,
+        dtype: ZDTypeLike,
         zarr_format: ZarrFormat,
         fill_value: Any | None = None,
         attributes: dict[str, JSON] | None = None,
@@ -410,7 +412,7 @@ async def create(
         *,
         # v2 and v3
         shape: ShapeLike,
-        dtype: npt.DTypeLike,
+        dtype: ZDTypeLike,
         zarr_format: ZarrFormat = 3,
         fill_value: Any | None = None,
         attributes: dict[str, JSON] | None = None,
@@ -446,7 +448,7 @@ async def create(
             The store where the array will be created.
         shape : ShapeLike
             The shape of the array.
-        dtype : npt.DTypeLike
+        dtype : ZDTypeLike
             The data type of the array.
         zarr_format : ZarrFormat, optional
             The Zarr format version (default is 3).
@@ -551,7 +553,7 @@ async def _create(
         *,
         # v2 and v3
         shape: ShapeLike,
-        dtype: npt.DTypeLike | ZDType[_BaseDType, _BaseScalar],
+        dtype: ZDTypeLike | ZDType[_BaseDType, _BaseScalar],
         zarr_format: ZarrFormat = 3,
         fill_value: Any | None = None,
         attributes: dict[str, JSON] | None = None,
@@ -1746,7 +1748,7 @@ def create(
         *,
         # v2 and v3
         shape: ChunkCoords,
-        dtype: npt.DTypeLike,
+        dtype: ZDTypeLike,
         zarr_format: ZarrFormat = 3,
         fill_value: Any | None = None,
         attributes: dict[str, JSON] | None = None,
@@ -1781,7 +1783,7 @@ def create(
             The array store that has already been initialized.
         shape : ChunkCoords
             The shape of the array.
-        dtype : npt.DTypeLike
+        dtype : ZDTypeLike
             The data type of the array.
         chunk_shape : ChunkCoords, optional
             The shape of the Array's chunks.
@@ -1875,7 +1877,7 @@ def _create(
         *,
         # v2 and v3
         shape: ChunkCoords,
-        dtype: npt.DTypeLike,
+        dtype: ZDTypeLike,
         zarr_format: ZarrFormat = 3,
         fill_value: Any | None = None,
         attributes: dict[str, JSON] | None = None,
@@ -3817,7 +3819,7 @@ async def init_array(
     *,
     store_path: StorePath,
     shape: ShapeLike,
-    dtype: npt.DTypeLike,
+    dtype: ZDTypeLike,
     chunks: ChunkCoords | Literal["auto"] = "auto",
     shards: ShardsLike | None = None,
     filters: FiltersLike = "auto",
@@ -3840,7 +3842,7 @@ async def init_array(
         StorePath instance. The path attribute is the name of the array to initialize.
     shape : ChunkCoords
         Shape of the array.
-    dtype : npt.DTypeLike
+    dtype : ZDTypeLike
         Data type of the array.
     chunks : ChunkCoords, optional
         Chunk shape of the array.
@@ -4028,7 +4030,7 @@ async def create_array(
     *,
     name: str | None = None,
     shape: ShapeLike | None = None,
-    dtype: npt.DTypeLike | None = None,
+    dtype: ZDTypeLike | None = None,
     data: np.ndarray[Any, np.dtype[Any]] | None = None,
     chunks: ChunkCoords | Literal["auto"] = "auto",
     shards: ShardsLike | None = None,
@@ -4057,7 +4059,7 @@ async def create_array(
         at the root of the store.
     shape : ChunkCoords, optional
         Shape of the array. Can be ``None`` if ``data`` is provided.
-    dtype : npt.DTypeLike | None
+    dtype : ZDTypeLike | None
         Data type of the array. Can be ``None`` if ``data`` is provided.
     data : Array-like data to use for initializing the array. If this parameter is provided, the
         ``shape`` and ``dtype`` parameters must be identical to ``data.shape`` and ``data.dtype``,
@@ -4401,8 +4403,8 @@ def _parse_data_params(
     *,
     data: np.ndarray[Any, np.dtype[Any]] | None,
     shape: ShapeLike | None,
-    dtype: npt.DTypeLike | None,
-) -> tuple[np.ndarray[Any, np.dtype[Any]] | None, ShapeLike, npt.DTypeLike]:
+    dtype: ZDTypeLike | None,
+) -> tuple[np.ndarray[Any, np.dtype[Any]] | None, ShapeLike, ZDTypeLike]:
     """
     Ensure an array-like ``data`` parameter is consistent with the ``dtype`` and ``shape``
     parameters.
diff --git a/src/zarr/core/dtype/__init__.py b/src/zarr/core/dtype/__init__.py
index fc494030f1..021b6b48e2 100644
--- a/src/zarr/core/dtype/__init__.py
+++ b/src/zarr/core/dtype/__init__.py
@@ -1,19 +1,16 @@
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, Any, get_args
-
-import numpy as np
-
-from zarr.core.dtype._numpy import _NUMPY_SUPPORTS_VLEN_STRING
-from zarr.core.dtype.wrapper import _BaseDType, _BaseScalar
+from typing import TYPE_CHECKING, Any, TypeAlias, get_args
 
 if TYPE_CHECKING:
-    import numpy.typing as npt
-
-    from zarr.core.common import JSON, ZarrFormat
+    from zarr.core.common import ZarrFormat
 
+import numpy as np
+import numpy.typing as npt
 
+from zarr.core.common import JSON
 from zarr.core.dtype._numpy import (
+    _NUMPY_SUPPORTS_VLEN_STRING,
     Bool,
     Complex64,
     Complex128,
@@ -36,7 +33,7 @@
     VariableLengthString,
 )
 from zarr.core.dtype.registry import DataTypeRegistry
-from zarr.core.dtype.wrapper import ZDType
+from zarr.core.dtype.wrapper import ZDType, _BaseDType, _BaseScalar
 
 __all__ = [
     "Complex64",
@@ -80,6 +77,8 @@
     | DateTime64
 )
 
+ZDTypeLike: TypeAlias = npt.DTypeLike | ZDType[Any, Any] | dict[str, JSON]
+
 for dtype in get_args(DTYPE):
     data_type_registry.register(dtype._zarr_v3_name, dtype)
 
@@ -112,9 +111,10 @@ def get_data_type_from_json(
     return data_type_registry.match_json(dtype, zarr_format=zarr_format)
 
 
-def parse_data_type(
-    dtype: npt.DTypeLike | ZDType[Any, Any] | dict[str, JSON], zarr_format: ZarrFormat
-) -> ZDType[Any, Any]:
+def parse_data_type(dtype: ZDTypeLike, zarr_format: ZarrFormat) -> ZDType[Any, Any]:
+    """
+    Interpret the input as a ZDType instance.
+    """
     if isinstance(dtype, ZDType):
         return dtype
     elif isinstance(dtype, dict):
diff --git a/src/zarr/core/dtype/_numpy.py b/src/zarr/core/dtype/_numpy.py
index 241626e6ac..4094403c3f 100644
--- a/src/zarr/core/dtype/_numpy.py
+++ b/src/zarr/core/dtype/_numpy.py
@@ -44,7 +44,25 @@
 EndiannessNumpy = Literal[">", "<", "|", "="]
 
 
-@dataclass(frozen=True, kw_only=True)
+@dataclass(frozen=True)
+class HasEndianness:
+    """
+    This is a mix-in class for data types with an endianness attribute
+    """
+
+    endianness: Endianness | None = "little"
+
+
+@dataclass(frozen=True)
+class HasLength:
+    """
+    This is a mix-in class for data types with a length attribute
+    """
+
+    length: int
+
+
+@dataclass(frozen=True, kw_only=True, slots=True)
 class Bool(ZDType[np.dtypes.BoolDType, np.bool_]):
     """
     Wrapper for numpy boolean dtype.
@@ -311,11 +329,10 @@ def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.uint8:
 
 
 @dataclass(frozen=True, kw_only=True)
-class Int16(ZDType[np.dtypes.Int16DType, np.int16]):
+class Int16(ZDType[np.dtypes.Int16DType, np.int16], HasEndianness):
     dtype_cls = np.dtypes.Int16DType
     _zarr_v3_name = "int16"
     _zarr_v2_names: ClassVar[tuple[str, ...]] = (">i2", "<i2")
-    endianness: Endianness | None = "little"
 
     @classmethod
     def _from_dtype_unsafe(cls, dtype: np.dtypes.Int16DType) -> Self:
@@ -367,11 +384,10 @@ def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.int16:
 
 
 @dataclass(frozen=True, kw_only=True)
-class UInt16(ZDType[np.dtypes.UInt16DType, np.uint16]):
+class UInt16(ZDType[np.dtypes.UInt16DType, np.uint16], HasEndianness):
     dtype_cls = np.dtypes.UInt16DType
     _zarr_v3_name = "uint16"
     _zarr_v2_names: ClassVar[tuple[str, ...]] = (">u2", "<u2")
-    endianness: Endianness | None = "little"
 
     @classmethod
     def _from_dtype_unsafe(cls, dtype: np.dtypes.UInt16DType) -> Self:
@@ -423,16 +439,18 @@ def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.uint16:
 
 
 @dataclass(frozen=True, kw_only=True)
-class Int32(ZDType[np.dtypes.Int32DType, np.int32]):
+class Int32(ZDType[np.dtypes.Int32DType, np.int32], HasEndianness):
     dtype_cls = np.dtypes.Int32DType
     _zarr_v3_name = "int32"
     _zarr_v2_names: ClassVar[tuple[str, ...]] = (">i4", "<i4")
-    endianness: Endianness | None = "little"
 
     @classmethod
     def from_dtype(cls: type[Self], dtype: _BaseDType) -> Self:
         # We override the base implementation to address a windows-specific, pre-numpy 2 issue where
         # ``np.dtype('i')`` is an instance of ``np.dtypes.IntDType`` that acts like `int32` instead of ``np.dtype('int32')``
+        # In this case, ``type(np.dtype('i')) == np.dtypes.Int32DType``  will evaluate to ``True``,
+        # despite the two classes being different. Thus we will create an instance of `cls` with the
+        # latter dtype, after pulling in the byte order of the input
         if dtype == np.dtypes.Int32DType():
             return cls._from_dtype_unsafe(np.dtypes.Int32DType().newbyteorder(dtype.byteorder))
         else:
@@ -488,11 +506,10 @@ def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.int32:
 
 
 @dataclass(frozen=True, kw_only=True)
-class UInt32(ZDType[np.dtypes.UInt32DType, np.uint32]):
+class UInt32(ZDType[np.dtypes.UInt32DType, np.uint32], HasEndianness):
     dtype_cls = np.dtypes.UInt32DType
     _zarr_v3_name = "uint32"
     _zarr_v2_names: ClassVar[tuple[str, ...]] = (">u4", "<u4")
-    endianness: Endianness | None = "little"
 
     @classmethod
     def _from_dtype_unsafe(cls, dtype: np.dtypes.UInt32DType) -> Self:
@@ -544,11 +561,10 @@ def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.uint32:
 
 
 @dataclass(frozen=True, kw_only=True)
-class Int64(ZDType[np.dtypes.Int64DType, np.int64]):
+class Int64(ZDType[np.dtypes.Int64DType, np.int64], HasEndianness):
     dtype_cls = np.dtypes.Int64DType
     _zarr_v3_name = "int64"
     _zarr_v2_names: ClassVar[tuple[str, ...]] = (">i8", "<i8")
-    endianness: Endianness | None = "little"
 
     @classmethod
     def _from_dtype_unsafe(cls, dtype: np.dtypes.Int64DType) -> Self:
@@ -600,11 +616,10 @@ def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.int64:
 
 
 @dataclass(frozen=True, kw_only=True)
-class UInt64(ZDType[np.dtypes.UInt64DType, np.uint64]):
+class UInt64(ZDType[np.dtypes.UInt64DType, np.uint64], HasEndianness):
     dtype_cls = np.dtypes.UInt64DType
     _zarr_v3_name = "uint64"
     _zarr_v2_names: ClassVar[tuple[str, ...]] = (">u8", "<u8")
-    endianness: Endianness | None = "little"
 
     @classmethod
     def _from_dtype_unsafe(cls, dtype: np.dtypes.UInt64DType) -> Self:
@@ -656,11 +671,10 @@ def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.uint64:
 
 
 @dataclass(frozen=True, kw_only=True)
-class Float16(ZDType[np.dtypes.Float16DType, np.float16]):
+class Float16(ZDType[np.dtypes.Float16DType, np.float16], HasEndianness):
     dtype_cls = np.dtypes.Float16DType
     _zarr_v3_name = "float16"
     _zarr_v2_names: ClassVar[tuple[str, ...]] = (">f2", "<f2")
-    endianness: Endianness | None = "little"
 
     @classmethod
     def _from_dtype_unsafe(cls, dtype: np.dtypes.Float16DType) -> Self:
@@ -712,11 +726,10 @@ def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.float16:
 
 
 @dataclass(frozen=True, kw_only=True)
-class Float32(ZDType[np.dtypes.Float32DType, np.float32]):
+class Float32(ZDType[np.dtypes.Float32DType, np.float32], HasEndianness):
     dtype_cls = np.dtypes.Float32DType
     _zarr_v3_name = "float32"
     _zarr_v2_names: ClassVar[tuple[str, ...]] = (">f4", "<f4")
-    endianness: Endianness | None = "little"
 
     @classmethod
     def _from_dtype_unsafe(cls, dtype: np.dtypes.Float32DType) -> Self:
@@ -768,11 +781,10 @@ def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.float32:
 
 
 @dataclass(frozen=True, kw_only=True)
-class Float64(ZDType[np.dtypes.Float64DType, np.float64]):
+class Float64(ZDType[np.dtypes.Float64DType, np.float64], HasEndianness):
     dtype_cls = np.dtypes.Float64DType
     _zarr_v3_name = "float64"
     _zarr_v2_names: ClassVar[tuple[str, ...]] = (">f8", "<f8")
-    endianness: Endianness | None = "little"
 
     @classmethod
     def _from_dtype_unsafe(cls, dtype: np.dtypes.Float64DType) -> Self:
@@ -824,11 +836,10 @@ def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.float64:
 
 
 @dataclass(frozen=True, kw_only=True)
-class Complex64(ZDType[np.dtypes.Complex64DType, np.complex64]):
+class Complex64(ZDType[np.dtypes.Complex64DType, np.complex64], HasEndianness):
     dtype_cls = np.dtypes.Complex64DType
     _zarr_v3_name = "complex64"
     _zarr_v2_names: ClassVar[tuple[str, ...]] = (">c8", "<c8")
-    endianness: Endianness | None = "little"
 
     @classmethod
     def _from_dtype_unsafe(cls, dtype: np.dtypes.Complex64DType) -> Self:
@@ -882,11 +893,10 @@ def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.complex6
 
 
 @dataclass(frozen=True, kw_only=True)
-class Complex128(ZDType[np.dtypes.Complex128DType, np.complex128]):
+class Complex128(ZDType[np.dtypes.Complex128DType, np.complex128], HasEndianness):
     dtype_cls = np.dtypes.Complex128DType
     _zarr_v3_name = "complex128"
     _zarr_v2_names: ClassVar[tuple[str, ...]] = (">c16", "<c16")
-    endianness: Endianness | None = "little"
 
     @classmethod
     def _from_dtype_unsafe(cls, dtype: np.dtypes.Complex128DType) -> Self:
@@ -940,11 +950,10 @@ def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.complex1
 
 
 @dataclass(frozen=True, kw_only=True)
-class FixedLengthAscii(ZDType[np.dtypes.BytesDType[int], np.bytes_]):
+class FixedLengthAscii(ZDType[np.dtypes.BytesDType[int], np.bytes_], HasLength):
     dtype_cls = np.dtypes.BytesDType
     _zarr_v3_name = "numpy.fixed_length_ascii"
     item_size_bits: ClassVar[int] = 8
-    length: int = 1
 
     @classmethod
     def _from_dtype_unsafe(cls, dtype: np.dtypes.BytesDType[int]) -> Self:
@@ -1004,14 +1013,13 @@ def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.bytes_:
 
 
 @dataclass(frozen=True, kw_only=True)
-class FixedLengthBytes(ZDType[np.dtypes.VoidDType[int], np.void]):
+class FixedLengthBytes(ZDType[np.dtypes.VoidDType[int], np.void], HasLength):
     # np.dtypes.VoidDType is specified in an odd way in numpy
     # it cannot be used to create instances of the dtype
     # so we have to tell mypy to ignore this here
     dtype_cls = np.dtypes.VoidDType  # type: ignore[assignment]
     _zarr_v3_name = "numpy.void"
     item_size_bits: ClassVar[int] = 8
-    length: int = 1
 
     @classmethod
     def _from_dtype_unsafe(cls, dtype: np.dtypes.VoidDType[int]) -> Self:
@@ -1085,12 +1093,10 @@ def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.void:
 
 
 @dataclass(frozen=True, kw_only=True)
-class FixedLengthUnicode(ZDType[np.dtypes.StrDType[int], np.str_]):
+class FixedLengthUnicode(ZDType[np.dtypes.StrDType[int], np.str_], HasEndianness, HasLength):
     dtype_cls = np.dtypes.StrDType
     _zarr_v3_name = "numpy.fixed_length_ucs4"
     item_size_bits: ClassVar[int] = 32  # UCS4 is 32 bits per code point
-    endianness: Endianness | None = "little"
-    length: int = 1
 
     @classmethod
     def _from_dtype_unsafe(cls, dtype: np.dtypes.StrDType[int]) -> Self:
@@ -1269,11 +1275,10 @@ def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> str:
 
 
 @dataclass(frozen=True, kw_only=True)
-class DateTime64(ZDType[np.dtypes.DateTime64DType, np.datetime64]):
+class DateTime64(ZDType[np.dtypes.DateTime64DType, np.datetime64], HasEndianness):
     dtype_cls = np.dtypes.DateTime64DType  # type: ignore[assignment]
     _zarr_v3_name = "numpy.datetime64"
-    unit: DateUnit | TimeUnit = "s"
-    endianness: Endianness | None = "little"
+    unit: DateUnit | TimeUnit
 
     @classmethod
     def _from_dtype_unsafe(cls, dtype: np.dtypes.DateTime64DType) -> Self:
diff --git a/src/zarr/core/dtype/common.py b/src/zarr/core/dtype/common.py
index 78dc6bdacd..106da80a61 100644
--- a/src/zarr/core/dtype/common.py
+++ b/src/zarr/core/dtype/common.py
@@ -325,7 +325,7 @@ def bytes_to_json(data: bytes, zarr_format: ZarrFormat) -> str:
     Parameters
     ----------
     data : bytes
-        The structured scalar value to convert.
+        The bytes to store.
     zarr_format : ZarrFormat
         The zarr format version.
 
@@ -334,9 +334,8 @@ def bytes_to_json(data: bytes, zarr_format: ZarrFormat) -> str:
     str
         The bytes encoded as ascii using the base64 alphabet.
     """
-    if zarr_format == 2:
-        return base64.b64encode(data).decode("ascii")
-    raise NotImplementedError(f"Invalid zarr format: {zarr_format}. Expected 2.")
+    # TODO: decide if we are going to make this implementation zarr format-specific
+    return base64.b64encode(data).decode("ascii")
 
 
 def bytes_from_json(data: str, zarr_format: ZarrFormat) -> bytes:

From 83f508ceb611598dfbaeadd4e4d66fd4b015601c Mon Sep 17 00:00:00 2001
From: Davis Bennett <davis.v.bennett@gmail.com>
Date: Mon, 24 Mar 2025 11:39:12 +0100
Subject: [PATCH 047/130] Update docs/user-guide/data_types.rst

Co-authored-by: Ilan Gold <ilanbassgold@gmail.com>
---
 docs/user-guide/data_types.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/user-guide/data_types.rst b/docs/user-guide/data_types.rst
index b964439706..36a9ea40f7 100644
--- a/docs/user-guide/data_types.rst
+++ b/docs/user-guide/data_types.rst
@@ -40,7 +40,7 @@ Thus the JSON identifier for a Numpy-compatible data type is just the Numpy ``st
   The ``<`` character in the data type metadata encodes the `endianness <https://numpy.org/doc/2.2/reference/generated/numpy.dtype.byteorder.html>`_, or "byte order", of the data type. Following Numpy's example,
   in Zarr version 2 each data type has an endianness where applicable. However, Zarr version 3 data types do not store endianness information.
 
-In addition to defining a representation of the data type itself (which in the example above was just a simple string ``"<i8"``, Zarr also
+In addition to defining a representation of the data type itself (which in the example above was just a simple string ``"<i8"``), Zarr also
 defines a metadata representation of scalars associated with that data type. Integers are stored as ``JSON`` numbers,
 as are floats, with the caveat that `NaN`, positive infinity, and negative infinity are stored as special strings.
 

From 4ceb6ede20fbf2220bf9a46ac0854462df0e3a32 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Mon, 24 Mar 2025 14:45:27 +0100
Subject: [PATCH 048/130] refactor: use inheritance to remove boilerplate in
 dtype definitions

---
 src/zarr/core/dtype/_numpy.py  | 900 ++++++++++++++-------------------
 src/zarr/core/dtype/common.py  |  33 +-
 src/zarr/core/dtype/wrapper.py |  74 ++-
 src/zarr/core/metadata/v2.py   |   6 +-
 tests/conftest.py              |  18 +-
 tests/test_array.py            |  37 +-
 tests/test_dtype.py            | 105 ++--
 tests/test_metadata/test_v3.py |  19 +-
 8 files changed, 575 insertions(+), 617 deletions(-)

diff --git a/src/zarr/core/dtype/_numpy.py b/src/zarr/core/dtype/_numpy.py
index 4094403c3f..38597f8fee 100644
--- a/src/zarr/core/dtype/_numpy.py
+++ b/src/zarr/core/dtype/_numpy.py
@@ -11,7 +11,12 @@
     ClassVar,
     Literal,
     Self,
+    SupportsComplex,
+    SupportsFloat,
+    SupportsIndex,
+    SupportsInt,
     TypeGuard,
+    TypeVar,
     cast,
     get_args,
 )
@@ -21,7 +26,6 @@
 from zarr.core.dtype.common import (
     DataTypeValidationError,
     Endianness,
-    JSONFloat,
     bytes_from_json,
     bytes_to_json,
     check_json_bool,
@@ -29,8 +33,8 @@
     check_json_float,
     check_json_int,
     check_json_str,
-    complex_from_json,
-    complex_to_json,
+    complex_float_from_json,
+    complex_float_to_json,
     datetime_from_json,
     datetime_to_json,
     float_from_json,
@@ -42,6 +46,9 @@
     from zarr.core.common import JSON, ZarrFormat
 
 EndiannessNumpy = Literal[">", "<", "|", "="]
+IntLike = SupportsInt | SupportsIndex | bytes | str
+FloatLike = SupportsIndex | SupportsFloat | bytes | str
+ComplexLike = SupportsFloat | SupportsIndex | SupportsComplex | bytes | str | None
 
 
 @dataclass(frozen=True)
@@ -80,7 +87,7 @@ class Bool(ZDType[np.dtypes.BoolDType, np.bool_]):
     dtype_cls = np.dtypes.BoolDType
 
     @classmethod
-    def _from_dtype_unsafe(cls, dtype: np.dtypes.BoolDType) -> Self:
+    def _from_dtype_unsafe(cls, dtype: _BaseDType) -> Self:
         return cls()
 
     def to_dtype(self: Self) -> np.dtypes.BoolDType:
@@ -119,9 +126,9 @@ def default_value(self) -> np.bool_:
         """
         return np.False_
 
-    def to_json_value(self, data: np.bool_, zarr_format: ZarrFormat) -> bool:
+    def to_json_value(self, data: object, zarr_format: ZarrFormat) -> bool:
         """
-        Convert a boolean value to JSON-serializable format.
+        Convert a scalar to a python bool.
 
         Parameters
         ----------
@@ -154,188 +161,175 @@ def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.bool_:
             The numpy boolean scalar.
         """
         if check_json_bool(data):
-            return np.bool_(data)
+            return self._cast_value_unsafe(data)
         raise TypeError(f"Invalid type: {data}. Expected a boolean.")
 
+    def check_value(self, data: object) -> bool:
+        # Anything can become a bool
+        return True
 
-@dataclass(frozen=True, kw_only=True)
-class Int8(ZDType[np.dtypes.Int8DType, np.int8]):
-    dtype_cls = np.dtypes.Int8DType
-    _zarr_v3_name = "int8"
-    _zarr_v2_names: ClassVar[tuple[str, ...]] = ("|i1",)
+    def cast_value(self, value: object) -> np.bool_:
+        return self._cast_value_unsafe(value)
 
-    @classmethod
-    def _from_dtype_unsafe(cls, dtype: np.dtypes.Int8DType) -> Self:
-        return cls()
+    def _cast_value_unsafe(self, value: object) -> np.bool_:
+        return np.bool_(value)
 
-    def to_dtype(self: Self) -> np.dtypes.Int8DType:
-        return self.dtype_cls()
 
-    @classmethod
-    def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[Literal["int8", "|i1"]]:
+_NumpyIntDType = (
+    np.dtypes.Int8DType
+    | np.dtypes.Int16DType
+    | np.dtypes.Int32DType
+    | np.dtypes.Int64DType
+    | np.dtypes.UInt8DType
+    | np.dtypes.UInt16DType
+    | np.dtypes.UInt32DType
+    | np.dtypes.UInt64DType
+)
+_NumpyIntScalar = (
+    np.int8 | np.int16 | np.int32 | np.int64 | np.uint8 | np.uint16 | np.uint32 | np.uint64
+)
+TIntDType_co = TypeVar("TIntDType_co", bound=_NumpyIntDType, covariant=True)
+TIntScalar_co = TypeVar("TIntScalar_co", bound=_NumpyIntScalar, covariant=True)
+
+
+@dataclass(frozen=True)
+class BaseInt(ZDType[TIntDType_co, TIntScalar_co]):
+    # This attribute holds the possible zarr v2 JSON names for the data type
+    _zarr_v2_names: ClassVar[tuple[str, ...]]
+
+    def to_json(self, zarr_format: ZarrFormat) -> str:
         """
-        Check that the input is a valid JSON representation of a 8-bit integer.
+        Convert the wrapped data type to a JSON-serializable form.
+
+        Parameters
+        ----------
+        zarr_format : ZarrFormat
+            The zarr format version.
+
+        Returns
+        -------
+        str
+            The JSON-serializable representation of the wrapped data type
         """
         if zarr_format == 2:
-            return data in cls._zarr_v2_names
+            return self.to_dtype().str
         elif zarr_format == 3:
-            return data == cls._zarr_v3_name
+            return self._zarr_v3_name
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
-    def to_json(self, zarr_format: ZarrFormat) -> str:
+    @classmethod
+    def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[JSON]:
+        """
+        Check that the input is a valid JSON representation of this data type.
+        """
         if zarr_format == 2:
-            return self.to_dtype().str
+            return data in cls._zarr_v2_names
         elif zarr_format == 3:
-            return self._zarr_v3_name
+            return data == cls._zarr_v3_name
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
-    @classmethod
-    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
-        return cls()
+    def check_value(self, value: object) -> TypeGuard[IntLike]:
+        return isinstance(value, IntLike)
 
-    def default_value(self) -> np.int8:
+    def _cast_value_unsafe(self, value: object) -> TIntScalar_co:
+        if self.check_value(value):
+            return self.to_dtype().type(value)  # type: ignore[return-value]
+        raise TypeError(f"Invalid type: {value}. Expected a value castable to an integer.")
+
+    def default_value(self) -> TIntScalar_co:
         """
-        Get the default value.
+        Get the default value, which is 0 cast to this dtype
 
         Returns
         -------
-        np.int8
+        Int scalar
             The default value.
         """
-        return np.int8(0)
+        return self._cast_value_unsafe(0)
 
-    def to_json_value(self, data: np.int8, zarr_format: ZarrFormat) -> int:
+    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> TIntScalar_co:
         """
-        Convert a numpy 8-bit int to JSON-serializable format.
+        Read a JSON-serializable value as a numpy int scalar.
 
         Parameters
         ----------
-        data : np.int8
-            The value to convert.
+        data : JSON
+            The JSON-serializable value.
         zarr_format : ZarrFormat
             The zarr format version.
 
         Returns
         -------
-        int
-            The JSON-serializable form of the scalar.
+        TScalar_co
+            The numpy scalar.
         """
-        return int(data)
+        if check_json_int(data):
+            return self._cast_value_unsafe(data)
+        raise TypeError(f"Invalid type: {data}. Expected an integer.")
 
-    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.int8:
+    def to_json_value(self, data: object, zarr_format: ZarrFormat) -> int:
         """
-        Read a JSON-serializable value as a numpy int8 scalar.
+        Convert an object to JSON-serializable scalar.
 
         Parameters
         ----------
-        data : JSON
-            The JSON-serializable value.
+        data : _BaseScalar
+            The value to convert.
         zarr_format : ZarrFormat
             The zarr format version.
 
         Returns
         -------
-        np.bool_
-            The numpy boolean scalar.
+        int
+            The JSON-serializable form of the scalar.
         """
-        if check_json_int(data):
-            return np.int8(data)
-        raise TypeError(f"Invalid type: {data}. Expected an integer.")
+        return int(self.cast_value(data))
 
 
 @dataclass(frozen=True, kw_only=True)
-class UInt8(ZDType[np.dtypes.UInt8DType, np.uint8]):
-    dtype_cls = np.dtypes.UInt8DType
-    _zarr_v3_name = "uint8"
-    _zarr_v2_names: ClassVar[tuple[str, ...]] = ("|u1",)
+class Int8(BaseInt[np.dtypes.Int8DType, np.int8]):
+    dtype_cls = np.dtypes.Int8DType
+    _zarr_v3_name = "int8"
+    _zarr_v2_names: ClassVar[tuple[str, ...]] = ("|i1",)
 
     @classmethod
-    def _from_dtype_unsafe(cls, dtype: np.dtypes.UInt8DType) -> Self:
+    def _from_dtype_unsafe(cls, dtype: _BaseDType) -> Self:
         return cls()
 
-    def to_dtype(self: Self) -> np.dtypes.UInt8DType:
+    def to_dtype(self: Self) -> np.dtypes.Int8DType:
         return self.dtype_cls()
 
-    @classmethod
-    def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[Literal["uint8", "|u1"]]:
-        """
-        Check that the input is a valid JSON representation of an unsigned 8-bit integer.
-        """
-        if zarr_format == 2:
-            return data in cls._zarr_v2_names
-        elif zarr_format == 3:
-            return data == cls._zarr_v3_name
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
-
-    def to_json(self, zarr_format: ZarrFormat) -> str:
-        if zarr_format == 2:
-            return self.to_dtype().str
-        elif zarr_format == 3:
-            return self._zarr_v3_name
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
-
     @classmethod
     def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
         return cls()
 
-    def default_value(self) -> np.uint8:
-        """
-        Get the default value for this data type.
-
-        Returns
-        -------
-        np.uint8
-            The default value.
-        """
-        return np.uint8(0)
-
-    def to_json_value(self, data: np.uint8, zarr_format: ZarrFormat) -> int:
-        """
-        Convert a numpy unsigned 8-bit integer to JSON-serializable format.
-
-        Parameters
-        ----------
-        data : np.uint8
-            The value to convert.
-        zarr_format : ZarrFormat
-            The zarr format version.
 
-        Returns
-        -------
-        int
-            The JSON-serializable form of the scalar.
-        """
-        return int(data)
+@dataclass(frozen=True, kw_only=True)
+class UInt8(BaseInt[np.dtypes.UInt8DType, np.uint8]):
+    dtype_cls = np.dtypes.UInt8DType
+    _zarr_v3_name = "uint8"
+    _zarr_v2_names: ClassVar[tuple[str, ...]] = ("|u1",)
 
-    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.uint8:
-        """
-        Read a JSON-serializable value as a numpy boolean scalar.
+    @classmethod
+    def _from_dtype_unsafe(cls, dtype: _BaseDType) -> Self:
+        return cls()
 
-        Parameters
-        ----------
-        data : JSON
-            The JSON-serializable value.
-        zarr_format : ZarrFormat
-            The zarr format version.
+    def to_dtype(self: Self) -> np.dtypes.UInt8DType:
+        return self.dtype_cls()
 
-        Returns
-        -------
-        np.bool_
-            The numpy boolean scalar.
-        """
-        if check_json_int(data):
-            return np.uint8(data)
-        raise TypeError(f"Invalid type: {data}. Expected an integer.")
+    @classmethod
+    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
+        return cls()
 
 
 @dataclass(frozen=True, kw_only=True)
-class Int16(ZDType[np.dtypes.Int16DType, np.int16], HasEndianness):
+class Int16(BaseInt[np.dtypes.Int16DType, np.int16], HasEndianness):
     dtype_cls = np.dtypes.Int16DType
     _zarr_v3_name = "int16"
     _zarr_v2_names: ClassVar[tuple[str, ...]] = (">i2", "<i2")
 
     @classmethod
-    def _from_dtype_unsafe(cls, dtype: np.dtypes.Int16DType) -> Self:
+    def _from_dtype_unsafe(cls, dtype: _BaseDType) -> Self:
         byte_order = cast("EndiannessNumpy", dtype.byteorder)
         return cls(endianness=endianness_from_numpy_str(byte_order))
 
@@ -343,54 +337,24 @@ def to_dtype(self) -> np.dtypes.Int16DType:
         byte_order = endianness_to_numpy_str(self.endianness)
         return self.dtype_cls().newbyteorder(byte_order)
 
-    @classmethod
-    def check_json(
-        cls, data: JSON, zarr_format: ZarrFormat
-    ) -> TypeGuard[Literal["int16", ">i2", "<i2"]]:
-        """
-        Check that the input is a valid JSON representation of a signed 16-bit integer.
-        """
-        if zarr_format == 2:
-            return data in cls._zarr_v2_names
-        elif zarr_format == 3:
-            return data == cls._zarr_v3_name
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
-
-    def to_json(self, zarr_format: ZarrFormat) -> str:
-        if zarr_format == 2:
-            return self.to_dtype().str
-        elif zarr_format == 3:
-            return self._zarr_v3_name
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
-
     @classmethod
     def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
         if zarr_format == 2:
+            # This ensures that we get the endianness correct without annoying string parsing
             return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
         elif zarr_format == 3:
             return cls()
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
-    def default_value(self) -> np.int16:
-        return self.to_dtype().type(0)
-
-    def to_json_value(self, data: np.int16, zarr_format: ZarrFormat) -> int:
-        return int(data)
-
-    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.int16:
-        if check_json_int(data):
-            return self.to_dtype().type(data)
-        raise TypeError(f"Invalid type: {data}. Expected an integer.")
-
 
 @dataclass(frozen=True, kw_only=True)
-class UInt16(ZDType[np.dtypes.UInt16DType, np.uint16], HasEndianness):
+class UInt16(BaseInt[np.dtypes.UInt16DType, np.uint16], HasEndianness):
     dtype_cls = np.dtypes.UInt16DType
     _zarr_v3_name = "uint16"
     _zarr_v2_names: ClassVar[tuple[str, ...]] = (">u2", "<u2")
 
     @classmethod
-    def _from_dtype_unsafe(cls, dtype: np.dtypes.UInt16DType) -> Self:
+    def _from_dtype_unsafe(cls, dtype: _BaseDType) -> Self:
         byte_order = cast("EndiannessNumpy", dtype.byteorder)
         return cls(endianness=endianness_from_numpy_str(byte_order))
 
@@ -398,26 +362,6 @@ def to_dtype(self) -> np.dtypes.UInt16DType:
         byte_order = endianness_to_numpy_str(self.endianness)
         return self.dtype_cls().newbyteorder(byte_order)
 
-    @classmethod
-    def check_json(
-        cls, data: JSON, zarr_format: ZarrFormat
-    ) -> TypeGuard[Literal["uint16", ">u2", "<u2"]]:
-        """
-        Check that the input is a valid JSON representation of an unsigned 16-bit integer.
-        """
-        if zarr_format == 2:
-            return data in cls._zarr_v2_names
-        elif zarr_format == 3:
-            return data == cls._zarr_v3_name
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
-
-    def to_json(self, zarr_format: ZarrFormat) -> str:
-        if zarr_format == 2:
-            return self.to_dtype().str
-        elif zarr_format == 3:
-            return self._zarr_v3_name
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
-
     @classmethod
     def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
         if zarr_format == 2:
@@ -426,20 +370,9 @@ def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
             return cls()
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
-    def default_value(self) -> np.uint16:
-        return self.to_dtype().type(0)
-
-    def to_json_value(self, data: np.uint16, zarr_format: ZarrFormat) -> int:
-        return int(data)
-
-    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.uint16:
-        if check_json_int(data):
-            return self.to_dtype().type(data)
-        raise TypeError(f"Invalid type: {data}. Expected an integer.")
-
 
 @dataclass(frozen=True, kw_only=True)
-class Int32(ZDType[np.dtypes.Int32DType, np.int32], HasEndianness):
+class Int32(BaseInt[np.dtypes.Int32DType, np.int32], HasEndianness):
     dtype_cls = np.dtypes.Int32DType
     _zarr_v3_name = "int32"
     _zarr_v2_names: ClassVar[tuple[str, ...]] = (">i4", "<i4")
@@ -457,7 +390,7 @@ def from_dtype(cls: type[Self], dtype: _BaseDType) -> Self:
             return super().from_dtype(dtype)
 
     @classmethod
-    def _from_dtype_unsafe(cls, dtype: np.dtypes.Int32DType) -> Self:
+    def _from_dtype_unsafe(cls, dtype: _BaseDType) -> Self:
         byte_order = cast("EndiannessNumpy", dtype.byteorder)
         return cls(endianness=endianness_from_numpy_str(byte_order))
 
@@ -465,26 +398,6 @@ def to_dtype(self) -> np.dtypes.Int32DType:
         byte_order = endianness_to_numpy_str(self.endianness)
         return self.dtype_cls().newbyteorder(byte_order)
 
-    @classmethod
-    def check_json(
-        cls, data: JSON, zarr_format: ZarrFormat
-    ) -> TypeGuard[Literal["int32", ">i4", "<i4"]]:
-        """
-        Check that the input is a valid JSON representation of a signed 32-bit integer.
-        """
-        if zarr_format == 2:
-            return data in cls._zarr_v2_names
-        elif zarr_format == 3:
-            return data == cls._zarr_v3_name
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
-
-    def to_json(self, zarr_format: ZarrFormat) -> str:
-        if zarr_format == 2:
-            return self.to_dtype().str
-        elif zarr_format == 3:
-            return self._zarr_v3_name
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
-
     @classmethod
     def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
         if zarr_format == 2:
@@ -493,26 +406,15 @@ def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
             return cls()
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
-    def default_value(self) -> np.int32:
-        return self.to_dtype().type(0)
-
-    def to_json_value(self, data: np.int32, zarr_format: ZarrFormat) -> int:
-        return int(data)
-
-    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.int32:
-        if check_json_int(data):
-            return self.to_dtype().type(data)
-        raise TypeError(f"Invalid type: {data}. Expected an integer.")
-
 
 @dataclass(frozen=True, kw_only=True)
-class UInt32(ZDType[np.dtypes.UInt32DType, np.uint32], HasEndianness):
+class UInt32(BaseInt[np.dtypes.UInt32DType, np.uint32], HasEndianness):
     dtype_cls = np.dtypes.UInt32DType
     _zarr_v3_name = "uint32"
     _zarr_v2_names: ClassVar[tuple[str, ...]] = (">u4", "<u4")
 
     @classmethod
-    def _from_dtype_unsafe(cls, dtype: np.dtypes.UInt32DType) -> Self:
+    def _from_dtype_unsafe(cls, dtype: _BaseDType) -> Self:
         byte_order = cast("EndiannessNumpy", dtype.byteorder)
         return cls(endianness=endianness_from_numpy_str(byte_order))
 
@@ -520,26 +422,6 @@ def to_dtype(self) -> np.dtypes.UInt32DType:
         byte_order = endianness_to_numpy_str(self.endianness)
         return self.dtype_cls().newbyteorder(byte_order)
 
-    @classmethod
-    def check_json(
-        cls, data: JSON, zarr_format: ZarrFormat
-    ) -> TypeGuard[Literal["uint32", ">u4", "<u4"]]:
-        """
-        Check that the input is a valid JSON representation of an unsigned 16-bit integer.
-        """
-        if zarr_format == 2:
-            return data in cls._zarr_v2_names
-        elif zarr_format == 3:
-            return data == cls._zarr_v3_name
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
-
-    def to_json(self, zarr_format: ZarrFormat) -> str:
-        if zarr_format == 2:
-            return self.to_dtype().str
-        elif zarr_format == 3:
-            return self._zarr_v3_name
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
-
     @classmethod
     def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
         if zarr_format == 2:
@@ -548,26 +430,15 @@ def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
             return cls()
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
-    def default_value(self) -> np.uint32:
-        return self.to_dtype().type(0)
-
-    def to_json_value(self, data: np.uint32, zarr_format: ZarrFormat) -> int:
-        return int(data)
-
-    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.uint32:
-        if check_json_int(data):
-            return self.to_dtype().type(data)
-        raise TypeError(f"Invalid type: {data}. Expected an integer.")
-
 
 @dataclass(frozen=True, kw_only=True)
-class Int64(ZDType[np.dtypes.Int64DType, np.int64], HasEndianness):
+class Int64(BaseInt[np.dtypes.Int64DType, np.int64], HasEndianness):
     dtype_cls = np.dtypes.Int64DType
     _zarr_v3_name = "int64"
     _zarr_v2_names: ClassVar[tuple[str, ...]] = (">i8", "<i8")
 
     @classmethod
-    def _from_dtype_unsafe(cls, dtype: np.dtypes.Int64DType) -> Self:
+    def _from_dtype_unsafe(cls, dtype: _BaseDType) -> Self:
         byte_order = cast("EndiannessNumpy", dtype.byteorder)
         return cls(endianness=endianness_from_numpy_str(byte_order))
 
@@ -575,26 +446,6 @@ def to_dtype(self) -> np.dtypes.Int64DType:
         byte_order = endianness_to_numpy_str(self.endianness)
         return self.dtype_cls().newbyteorder(byte_order)
 
-    @classmethod
-    def check_json(
-        cls, data: JSON, zarr_format: ZarrFormat
-    ) -> TypeGuard[Literal["int64", ">i8", "<i8"]]:
-        """
-        Check that the input is a valid JSON representation of a signed 16-bit integer.
-        """
-        if zarr_format == 2:
-            return data in cls._zarr_v2_names
-        elif zarr_format == 3:
-            return data == cls._zarr_v3_name
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
-
-    def to_json(self, zarr_format: ZarrFormat) -> str:
-        if zarr_format == 2:
-            return self.to_dtype().str
-        elif zarr_format == 3:
-            return self._zarr_v3_name
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
-
     @classmethod
     def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
         if zarr_format == 2:
@@ -603,26 +454,15 @@ def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
             return cls()
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
-    def default_value(self) -> np.int64:
-        return self.to_dtype().type(0)
-
-    def to_json_value(self, data: np.int64, zarr_format: ZarrFormat) -> int:
-        return int(data)
-
-    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.int64:
-        if check_json_int(data):
-            return self.to_dtype().type(data)
-        raise TypeError(f"Invalid type: {data}. Expected an integer.")
-
 
 @dataclass(frozen=True, kw_only=True)
-class UInt64(ZDType[np.dtypes.UInt64DType, np.uint64], HasEndianness):
+class UInt64(BaseInt[np.dtypes.UInt64DType, np.uint64], HasEndianness):
     dtype_cls = np.dtypes.UInt64DType
     _zarr_v3_name = "uint64"
     _zarr_v2_names: ClassVar[tuple[str, ...]] = (">u8", "<u8")
 
     @classmethod
-    def _from_dtype_unsafe(cls, dtype: np.dtypes.UInt64DType) -> Self:
+    def _from_dtype_unsafe(cls, dtype: _BaseDType) -> Self:
         byte_order = cast("EndiannessNumpy", dtype.byteorder)
         return cls(endianness=endianness_from_numpy_str(byte_order))
 
@@ -630,26 +470,6 @@ def to_dtype(self) -> np.dtypes.UInt64DType:
         byte_order = endianness_to_numpy_str(self.endianness)
         return self.dtype_cls().newbyteorder(byte_order)
 
-    @classmethod
-    def check_json(
-        cls, data: JSON, zarr_format: ZarrFormat
-    ) -> TypeGuard[Literal["uint64", ">u8", "<u8"]]:
-        """
-        Check that the input is a valid JSON representation of a signed 16-bit integer.
-        """
-        if zarr_format == 2:
-            return data in cls._zarr_v2_names
-        elif zarr_format == 3:
-            return data == cls._zarr_v3_name
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
-
-    def to_json(self, zarr_format: ZarrFormat) -> str:
-        if zarr_format == 2:
-            return self.to_dtype().str
-        elif zarr_format == 3:
-            return self._zarr_v3_name
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
-
     @classmethod
     def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
         if zarr_format == 2:
@@ -658,47 +478,45 @@ def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
             return cls()
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
-    def default_value(self) -> np.uint64:
-        return self.to_dtype().type(0)
-
-    def to_json_value(self, data: np.uint64, zarr_format: ZarrFormat) -> int:
-        return int(data)
 
-    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.uint64:
-        if check_json_int(data):
-            return self.to_dtype().type(data)
-        raise TypeError(f"Invalid type: {data}. Expected an integer.")
+TFloatDType_co = TypeVar(
+    "TFloatDType_co",
+    bound=np.dtypes.Float16DType | np.dtypes.Float32DType | np.dtypes.Float64DType,
+    covariant=True,
+)
+TFloatScalar_co = TypeVar(
+    "TFloatScalar_co", bound=np.float16 | np.float32 | np.float64, covariant=True
+)
 
 
-@dataclass(frozen=True, kw_only=True)
-class Float16(ZDType[np.dtypes.Float16DType, np.float16], HasEndianness):
-    dtype_cls = np.dtypes.Float16DType
-    _zarr_v3_name = "float16"
-    _zarr_v2_names: ClassVar[tuple[str, ...]] = (">f2", "<f2")
+@dataclass(frozen=True)
+class BaseFloat(ZDType[TFloatDType_co, TFloatScalar_co], HasEndianness):
+    # This attribute holds the possible zarr v2 JSON names for the data type
+    _zarr_v2_names: ClassVar[tuple[str, ...]]
 
     @classmethod
-    def _from_dtype_unsafe(cls, dtype: np.dtypes.Float16DType) -> Self:
+    def _from_dtype_unsafe(cls, dtype: _BaseDType) -> Self:
         byte_order = cast("EndiannessNumpy", dtype.byteorder)
         return cls(endianness=endianness_from_numpy_str(byte_order))
 
-    def to_dtype(self) -> np.dtypes.Float16DType:
+    def to_dtype(self) -> TFloatDType_co:
         byte_order = endianness_to_numpy_str(self.endianness)
-        return self.dtype_cls().newbyteorder(byte_order)
+        return self.dtype_cls().newbyteorder(byte_order)  # type: ignore[return-value]
 
-    @classmethod
-    def check_json(
-        cls, data: JSON, zarr_format: ZarrFormat
-    ) -> TypeGuard[Literal["float", ">f2", "<f2"]]:
-        """
-        Check that the input is a valid JSON representation of a signed 16-bit integer.
+    def to_json(self, zarr_format: ZarrFormat) -> str:
         """
-        if zarr_format == 2:
-            return data in cls._zarr_v2_names
-        elif zarr_format == 3:
-            return data == cls._zarr_v3_name
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+        Convert the wrapped data type to a JSON-serializable form.
 
-    def to_json(self, zarr_format: ZarrFormat) -> str:
+        Parameters
+        ----------
+        zarr_format : ZarrFormat
+            The zarr format version.
+
+        Returns
+        -------
+        str
+            The JSON-serializable representation of the wrapped data type
+        """
         if zarr_format == 2:
             return self.to_dtype().str
         elif zarr_format == 3:
@@ -713,39 +531,10 @@ def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
             return cls()
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
-    def default_value(self) -> np.float16:
-        return self.to_dtype().type(0)
-
-    def to_json_value(self, data: np.float16, zarr_format: ZarrFormat) -> JSONFloat:
-        return float_to_json(data, zarr_format)
-
-    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.float16:
-        if check_json_float(data, zarr_format=zarr_format):
-            return self.to_dtype().type(float_from_json(data, zarr_format))
-        raise TypeError(f"Invalid type: {data}. Expected a float.")
-
-
-@dataclass(frozen=True, kw_only=True)
-class Float32(ZDType[np.dtypes.Float32DType, np.float32], HasEndianness):
-    dtype_cls = np.dtypes.Float32DType
-    _zarr_v3_name = "float32"
-    _zarr_v2_names: ClassVar[tuple[str, ...]] = (">f4", "<f4")
-
-    @classmethod
-    def _from_dtype_unsafe(cls, dtype: np.dtypes.Float32DType) -> Self:
-        byte_order = cast("EndiannessNumpy", dtype.byteorder)
-        return cls(endianness=endianness_from_numpy_str(byte_order))
-
-    def to_dtype(self) -> np.dtypes.Float32DType:
-        byte_order = endianness_to_numpy_str(self.endianness)
-        return self.dtype_cls().newbyteorder(byte_order)
-
     @classmethod
-    def check_json(
-        cls, data: JSON, zarr_format: ZarrFormat
-    ) -> TypeGuard[Literal["float32", ">f4", "<f4"]]:
+    def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[JSON]:
         """
-        Check that the input is a valid JSON representation of a signed 16-bit integer.
+        Check that the input is a valid JSON representation of this data type.
         """
         if zarr_format == 2:
             return data in cls._zarr_v2_names
@@ -753,62 +542,122 @@ def check_json(
             return data == cls._zarr_v3_name
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
-    def to_json(self, zarr_format: ZarrFormat) -> str:
-        if zarr_format == 2:
-            return self.to_dtype().str
-        elif zarr_format == 3:
-            return self._zarr_v3_name
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+    def check_value(self, value: object) -> TypeGuard[FloatLike]:
+        return isinstance(value, FloatLike)
 
-    @classmethod
-    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
-        if zarr_format == 2:
-            return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
-        elif zarr_format == 3:
-            return cls()
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+    def _cast_value_unsafe(self, value: object) -> TFloatScalar_co:
+        if self.check_value(value):
+            return self.to_dtype().type(value)  # type: ignore[return-value]
+        raise TypeError(f"Invalid type: {value}. Expected a value castable to a float.")
+
+    def default_value(self) -> TFloatScalar_co:
+        """
+        Get the default value, which is 0 cast to this dtype
+
+        Returns
+        -------
+        Int scalar
+            The default value.
+        """
+        return self._cast_value_unsafe(0)
 
-    def default_value(self) -> np.float32:
-        return self.to_dtype().type(0)
+    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> TFloatScalar_co:
+        """
+        Read a JSON-serializable value as a numpy float.
 
-    def to_json_value(self, data: np.float32, zarr_format: ZarrFormat) -> JSONFloat:
-        return float_to_json(data, zarr_format)
+        Parameters
+        ----------
+        data : JSON
+            The JSON-serializable value.
+        zarr_format : ZarrFormat
+            The zarr format version.
 
-    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.float32:
+        Returns
+        -------
+        TScalar_co
+            The numpy float.
+        """
         if check_json_float(data, zarr_format=zarr_format):
-            return self.to_dtype().type(float_from_json(data, zarr_format))
-        raise TypeError(f"Invalid type: {data}. Expected a float.")
+            return self._cast_value_unsafe(float_from_json(data, zarr_format=zarr_format))
+        raise TypeError(
+            f"Invalid type: {data}. Expected a float or a special string encoding of a float."
+        )
+
+    def to_json_value(self, data: object, zarr_format: ZarrFormat) -> float | str:
+        """
+        Convert an object to a JSON-serializable float.
+
+        Parameters
+        ----------
+        data : _BaseScalar
+            The value to convert.
+        zarr_format : ZarrFormat
+            The zarr format version.
+
+        Returns
+        -------
+        JSON
+            The JSON-serializable form of the float, which is potentially a number or a string.
+            See the zarr specifications for details on the JSON encoding for floats.
+        """
+        return float_to_json(self._cast_value_unsafe(data), zarr_format=zarr_format)
+
+
+@dataclass(frozen=True, kw_only=True)
+class Float16(BaseFloat[np.dtypes.Float16DType, np.float16]):
+    dtype_cls = np.dtypes.Float16DType
+    _zarr_v3_name = "float16"
+    _zarr_v2_names: ClassVar[tuple[str, ...]] = (">f2", "<f2")
 
 
 @dataclass(frozen=True, kw_only=True)
-class Float64(ZDType[np.dtypes.Float64DType, np.float64], HasEndianness):
+class Float32(BaseFloat[np.dtypes.Float32DType, np.float32]):
+    dtype_cls = np.dtypes.Float32DType
+    _zarr_v3_name = "float32"
+    _zarr_v2_names: ClassVar[tuple[str, ...]] = (">f4", "<f4")
+
+
+@dataclass(frozen=True, kw_only=True)
+class Float64(BaseFloat[np.dtypes.Float64DType, np.float64]):
     dtype_cls = np.dtypes.Float64DType
     _zarr_v3_name = "float64"
     _zarr_v2_names: ClassVar[tuple[str, ...]] = (">f8", "<f8")
 
+
+TComplexDType_co = TypeVar(
+    "TComplexDType_co", bound=np.dtypes.Complex64DType | np.dtypes.Complex128DType, covariant=True
+)
+TComplexScalar_co = TypeVar("TComplexScalar_co", bound=np.complex64 | np.complex128, covariant=True)
+
+
+@dataclass(frozen=True)
+class BaseComplex(ZDType[TComplexDType_co, TComplexScalar_co], HasEndianness):
+    # This attribute holds the possible zarr v2 JSON names for the data type
+    _zarr_v2_names: ClassVar[tuple[str, ...]]
+
     @classmethod
-    def _from_dtype_unsafe(cls, dtype: np.dtypes.Float64DType) -> Self:
+    def _from_dtype_unsafe(cls, dtype: _BaseDType) -> Self:
         byte_order = cast("EndiannessNumpy", dtype.byteorder)
         return cls(endianness=endianness_from_numpy_str(byte_order))
 
-    def to_dtype(self) -> np.dtypes.Float64DType:
+    def to_dtype(self) -> TComplexDType_co:
         byte_order = endianness_to_numpy_str(self.endianness)
-        return self.dtype_cls().newbyteorder(byte_order)
+        return self.dtype_cls().newbyteorder(byte_order)  # type: ignore[return-value]
 
-    @classmethod
-    def check_json(
-        cls, data: JSON, zarr_format: ZarrFormat
-    ) -> TypeGuard[Literal["float64", ">f8", "<f8"]]:
-        """
-        Check that the input is a valid JSON representation of a signed 16-bit integer.
+    def to_json(self, zarr_format: ZarrFormat) -> str:
         """
-        if zarr_format == 2:
-            return data in cls._zarr_v2_names
-        elif zarr_format == 3:
-            return data == cls._zarr_v3_name
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+        Convert the wrapped data type to a JSON-serializable form.
 
-    def to_json(self, zarr_format: ZarrFormat) -> str:
+        Parameters
+        ----------
+        zarr_format : ZarrFormat
+            The zarr format version.
+
+        Returns
+        -------
+        str
+            The JSON-serializable representation of the wrapped data type
+        """
         if zarr_format == 2:
             return self.to_dtype().str
         elif zarr_format == 3:
@@ -823,39 +672,10 @@ def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
             return cls()
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
-    def default_value(self) -> np.float64:
-        return self.to_dtype().type(0)
-
-    def to_json_value(self, data: np.float64, zarr_format: ZarrFormat) -> JSONFloat:
-        return float_to_json(data, zarr_format)
-
-    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.float64:
-        if check_json_float(data, zarr_format=zarr_format):
-            return self.to_dtype().type(float_from_json(data, zarr_format))
-        raise TypeError(f"Invalid type: {data}. Expected a float.")
-
-
-@dataclass(frozen=True, kw_only=True)
-class Complex64(ZDType[np.dtypes.Complex64DType, np.complex64], HasEndianness):
-    dtype_cls = np.dtypes.Complex64DType
-    _zarr_v3_name = "complex64"
-    _zarr_v2_names: ClassVar[tuple[str, ...]] = (">c8", "<c8")
-
-    @classmethod
-    def _from_dtype_unsafe(cls, dtype: np.dtypes.Complex64DType) -> Self:
-        byte_order = cast("EndiannessNumpy", dtype.byteorder)
-        return cls(endianness=endianness_from_numpy_str(byte_order))
-
-    def to_dtype(self) -> np.dtypes.Complex64DType:
-        byte_order = endianness_to_numpy_str(self.endianness)
-        return self.dtype_cls().newbyteorder(byte_order)
-
     @classmethod
-    def check_json(
-        cls, data: JSON, zarr_format: ZarrFormat
-    ) -> TypeGuard[Literal["complex64", ">c8", "<c8"]]:
+    def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[JSON]:
         """
-        Check that the input is a valid JSON representation of a signed 16-bit integer.
+        Check that the input is a valid JSON representation of this data type.
         """
         if zarr_format == 2:
             return data in cls._zarr_v2_names
@@ -863,90 +683,79 @@ def check_json(
             return data == cls._zarr_v3_name
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
-    def to_json(self, zarr_format: ZarrFormat) -> str:
-        if zarr_format == 2:
-            return self.to_dtype().str
-        elif zarr_format == 3:
-            return self._zarr_v3_name
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+    def check_value(self, value: object) -> bool:
+        return isinstance(value, ComplexLike)
 
-    @classmethod
-    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
-        if zarr_format == 2:
-            return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
-        elif zarr_format == 3:
-            return cls()
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+    def _cast_value_unsafe(self, value: object) -> TComplexScalar_co:
+        if self.check_value(value):
+            return self.to_dtype().type(value)  # type: ignore[arg-type, return-value]
+        raise TypeError(f"Invalid type: {value}. Expected a value castable to a complex scalar.")
 
-    def default_value(self) -> np.complex64:
-        return self.to_dtype().type(0)
+    def default_value(self) -> TComplexScalar_co:
+        """
+        Get the default value, which is 0 cast to this dtype
 
-    def to_json_value(
-        self, data: np.complex64, zarr_format: ZarrFormat
-    ) -> tuple[JSONFloat, JSONFloat]:
-        return complex_to_json(data, zarr_format)
+        Returns
+        -------
+        Int scalar
+            The default value.
+        """
+        return self._cast_value_unsafe(0)
 
-    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.complex64:
-        if check_json_complex_float(data, zarr_format=zarr_format):
-            return complex_from_json(data, dtype=self.to_dtype(), zarr_format=zarr_format)
-        raise TypeError(f"Invalid type: {data}. Expected a complex float.")
+    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> TComplexScalar_co:
+        """
+        Read a JSON-serializable value as a numpy float.
 
+        Parameters
+        ----------
+        data : JSON
+            The JSON-serializable value.
+        zarr_format : ZarrFormat
+            The zarr format version.
 
-@dataclass(frozen=True, kw_only=True)
-class Complex128(ZDType[np.dtypes.Complex128DType, np.complex128], HasEndianness):
-    dtype_cls = np.dtypes.Complex128DType
-    _zarr_v3_name = "complex128"
-    _zarr_v2_names: ClassVar[tuple[str, ...]] = (">c16", "<c16")
+        Returns
+        -------
+        TScalar_co
+            The numpy float.
+        """
+        if check_json_complex_float(data, zarr_format=zarr_format):
+            return self._cast_value_unsafe(complex_float_from_json(data, zarr_format=zarr_format))
+        raise TypeError(
+            f"Invalid type: {data}. Expected a float or a special string encoding of a float."
+        )
 
-    @classmethod
-    def _from_dtype_unsafe(cls, dtype: np.dtypes.Complex128DType) -> Self:
-        byte_order = cast("EndiannessNumpy", dtype.byteorder)
-        return cls(endianness=endianness_from_numpy_str(byte_order))
+    def to_json_value(self, data: object, zarr_format: ZarrFormat) -> JSON:
+        """
+        Convert an object to a JSON-serializable float.
 
-    def to_dtype(self) -> np.dtypes.Complex128DType:
-        byte_order = endianness_to_numpy_str(self.endianness)
-        return self.dtype_cls().newbyteorder(byte_order)
+        Parameters
+        ----------
+        data : _BaseScalar
+            The value to convert.
+        zarr_format : ZarrFormat
+            The zarr format version.
 
-    @classmethod
-    def check_json(
-        cls, data: JSON, zarr_format: ZarrFormat
-    ) -> TypeGuard[Literal["complex128", ">c16", "<c16"]]:
-        """
-        Check that the input is a valid JSON representation of a signed 16-bit integer.
+        Returns
+        -------
+        JSON
+            The JSON-serializable form of the complex number, which is a list of two floats,
+            each of which is encoding according to a zarr-format-specific encoding.
         """
-        if zarr_format == 2:
-            return data in cls._zarr_v2_names
-        elif zarr_format == 3:
-            return data == cls._zarr_v3_name
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+        return complex_float_to_json(self.cast_value(data), zarr_format=zarr_format)
 
-    def to_json(self, zarr_format: ZarrFormat) -> str:
-        if zarr_format == 2:
-            return self.to_dtype().str
-        elif zarr_format == 3:
-            return self._zarr_v3_name
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
-    @classmethod
-    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
-        if zarr_format == 2:
-            return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
-        elif zarr_format == 3:
-            return cls()
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
-
-    def default_value(self) -> np.complex128:
-        return self.to_dtype().type(0)
+@dataclass(frozen=True, kw_only=True)
+class Complex64(BaseComplex[np.dtypes.Complex64DType, np.complex64]):
+    dtype_cls = np.dtypes.Complex64DType
+    _zarr_v3_name = "complex64"
+    _zarr_v2_names: ClassVar[tuple[str, ...]] = (">c8", "<c8")
 
-    def to_json_value(
-        self, data: np.complex128, zarr_format: ZarrFormat
-    ) -> tuple[JSONFloat, JSONFloat]:
-        return complex_to_json(data, zarr_format)
 
-    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.complex128:
-        if check_json_complex_float(data, zarr_format=zarr_format):
-            return complex_from_json(data, dtype=self.to_dtype(), zarr_format=zarr_format)
-        raise TypeError(f"Invalid type: {data}. Expected a complex float.")
+@dataclass(frozen=True, kw_only=True)
+class Complex128(BaseComplex[np.dtypes.Complex128DType, np.complex128], HasEndianness):
+    dtype_cls = np.dtypes.Complex128DType
+    _zarr_v3_name = "complex128"
+    _zarr_v2_names: ClassVar[tuple[str, ...]] = (">c16", "<c16")
 
 
 @dataclass(frozen=True, kw_only=True)
@@ -956,7 +765,7 @@ class FixedLengthAscii(ZDType[np.dtypes.BytesDType[int], np.bytes_], HasLength):
     item_size_bits: ClassVar[int] = 8
 
     @classmethod
-    def _from_dtype_unsafe(cls, dtype: np.dtypes.BytesDType[int]) -> Self:
+    def _from_dtype_unsafe(cls, dtype: _BaseDType) -> Self:
         return cls(length=dtype.itemsize // (cls.item_size_bits // 8))
 
     def to_dtype(self) -> np.dtypes.BytesDType[int]:
@@ -1003,14 +812,20 @@ def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
     def default_value(self) -> np.bytes_:
         return np.bytes_(b"")
 
-    def to_json_value(self, data: np.bytes_, *, zarr_format: ZarrFormat) -> str:
-        return base64.standard_b64encode(data).decode("ascii")
+    def to_json_value(self, data: object, *, zarr_format: ZarrFormat) -> str:
+        return base64.standard_b64encode(data).decode("ascii")  # type: ignore[arg-type]
 
     def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.bytes_:
         if check_json_str(data):
             return self.to_dtype().type(base64.standard_b64decode(data.encode("ascii")))
         raise TypeError(f"Invalid type: {data}. Expected a string.")
 
+    def check_value(self, data: object) -> bool:
+        return isinstance(data, np.bytes_ | str | bytes)
+
+    def _cast_value_unsafe(self, value: object) -> np.bytes_:
+        return self.to_dtype().type(value)
+
 
 @dataclass(frozen=True, kw_only=True)
 class FixedLengthBytes(ZDType[np.dtypes.VoidDType[int], np.void], HasLength):
@@ -1022,7 +837,7 @@ class FixedLengthBytes(ZDType[np.dtypes.VoidDType[int], np.void], HasLength):
     item_size_bits: ClassVar[int] = 8
 
     @classmethod
-    def _from_dtype_unsafe(cls, dtype: np.dtypes.VoidDType[int]) -> Self:
+    def _from_dtype_unsafe(cls, dtype: _BaseDType) -> Self:
         return cls(length=dtype.itemsize // (cls.item_size_bits // 8))
 
     def to_dtype(self) -> np.dtypes.VoidDType[int]:
@@ -1083,14 +898,20 @@ def check_dtype(cls: type[Self], dtype: _BaseDType) -> TypeGuard[np.dtypes.VoidD
     def default_value(self) -> np.void:
         return self.to_dtype().type(("\x00" * self.length).encode("ascii"))
 
-    def to_json_value(self, data: np.void, *, zarr_format: ZarrFormat) -> str:
-        return base64.standard_b64encode(data.tobytes()).decode("ascii")
+    def to_json_value(self, data: object, *, zarr_format: ZarrFormat) -> str:
+        return base64.standard_b64encode(self.cast_value(data).tobytes()).decode("ascii")
 
     def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.void:
         if check_json_str(data):
             return self.to_dtype().type(base64.standard_b64decode(data))
         raise DataTypeValidationError(f"Invalid type: {data}. Expected a string.")
 
+    def check_value(self, data: object) -> bool:
+        return isinstance(data, np.bytes_ | str | bytes)
+
+    def _cast_value_unsafe(self, value: object) -> np.void:
+        return self.to_dtype().type(value)  # type: ignore[call-overload, no-any-return]
+
 
 @dataclass(frozen=True, kw_only=True)
 class FixedLengthUnicode(ZDType[np.dtypes.StrDType[int], np.str_], HasEndianness, HasLength):
@@ -1099,7 +920,7 @@ class FixedLengthUnicode(ZDType[np.dtypes.StrDType[int], np.str_], HasEndianness
     item_size_bits: ClassVar[int] = 32  # UCS4 is 32 bits per code point
 
     @classmethod
-    def _from_dtype_unsafe(cls, dtype: np.dtypes.StrDType[int]) -> Self:
+    def _from_dtype_unsafe(cls, dtype: _BaseDType) -> Self:
         byte_order = cast("EndiannessNumpy", dtype.byteorder)
         return cls(
             length=dtype.itemsize // (cls.item_size_bits // 8),
@@ -1151,7 +972,7 @@ def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
     def default_value(self) -> np.str_:
         return np.str_("")
 
-    def to_json_value(self, data: np.str_, *, zarr_format: ZarrFormat) -> str:
+    def to_json_value(self, data: object, *, zarr_format: ZarrFormat) -> str:
         return str(data)
 
     def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.str_:
@@ -1159,6 +980,12 @@ def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.str_:
             raise TypeError(f"Invalid type: {data}. Expected a string.")
         return self.to_dtype().type(data)
 
+    def check_value(self, data: object) -> bool:
+        return isinstance(data, str | np.str_ | bytes)
+
+    def _cast_value_unsafe(self, value: object) -> np.str_:
+        return self.to_dtype().type(value)
+
 
 _NUMPY_SUPPORTS_VLEN_STRING = hasattr(np.dtypes, "StringDType")
 
@@ -1171,7 +998,7 @@ class VariableLengthString(ZDType[np.dtypes.StringDType, str]):  # type: ignore[
         _zarr_v3_name = "numpy.variable_length_utf8"
 
         @classmethod
-        def _from_dtype_unsafe(cls, dtype: np.dtypes.StringDType) -> Self:
+        def _from_dtype_unsafe(cls, dtype: _BaseDType) -> Self:
             return cls()
 
         def to_dtype(self) -> np.dtypes.StringDType:
@@ -1217,6 +1044,12 @@ def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> str:
                 raise TypeError(f"Invalid type: {data}. Expected a string.")
             return data
 
+        def check_value(self, data: object) -> bool:
+            return isinstance(data, str)
+
+        def _cast_value_unsafe(self, value: object) -> str:
+            return str(value)
+
 else:
 
     @dataclass(frozen=True, kw_only=True)
@@ -1225,7 +1058,7 @@ class VariableLengthString(ZDType[np.dtypes.ObjectDType, str]):  # type: ignore[
         _zarr_v3_name = "numpy.variable_length_utf8"
 
         @classmethod
-        def _from_dtype_unsafe(cls, dtype: np.dtypes.ObjectDType) -> Self:
+        def _from_dtype_unsafe(cls, dtype: _BaseDType) -> Self:
             return cls()
 
         def to_dtype(self) -> np.dtypes.ObjectDType:
@@ -1258,8 +1091,8 @@ def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
         def default_value(self) -> str:
             return ""
 
-        def to_json_value(self, data: str, *, zarr_format: ZarrFormat) -> str:
-            return data
+        def to_json_value(self, data: object, *, zarr_format: ZarrFormat) -> str:
+            return data  # type: ignore[return-value]
 
         def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> str:
             """
@@ -1269,19 +1102,25 @@ def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> str:
                 raise TypeError(f"Invalid type: {data}. Expected a string.")
             return data
 
+        def check_value(self, data: object) -> bool:
+            return isinstance(data, str)
+
+        def _cast_value_unsafe(self, value: object) -> str:
+            return str(value)
+
 
 DateUnit = Literal["Y", "M", "W", "D"]
 TimeUnit = Literal["h", "m", "s", "ms", "us", "μs", "ns", "ps", "fs", "as"]
 
 
-@dataclass(frozen=True, kw_only=True)
+@dataclass(frozen=True, kw_only=True, slots=True)
 class DateTime64(ZDType[np.dtypes.DateTime64DType, np.datetime64], HasEndianness):
     dtype_cls = np.dtypes.DateTime64DType  # type: ignore[assignment]
     _zarr_v3_name = "numpy.datetime64"
     unit: DateUnit | TimeUnit
 
     @classmethod
-    def _from_dtype_unsafe(cls, dtype: np.dtypes.DateTime64DType) -> Self:
+    def _from_dtype_unsafe(cls, dtype: _BaseDType) -> Self:
         unit: DateUnit | TimeUnit = dtype.name[dtype.name.rfind("[") + 1 : dtype.name.rfind("]")]  # type: ignore[assignment]
         if unit not in get_args(DateUnit) and unit not in get_args(TimeUnit):
             raise DataTypeValidationError('Invalid unit for "numpy.datetime64"')
@@ -1345,8 +1184,19 @@ def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.datetime
             return datetime_from_json(data, self.unit)
         raise TypeError(f"Invalid type: {data}. Expected an integer.")
 
-    def to_json_value(self, data: np.datetime64, *, zarr_format: ZarrFormat) -> int:
-        return datetime_to_json(data)
+    def to_json_value(self, data: object, *, zarr_format: ZarrFormat) -> int:
+        return datetime_to_json(data)  # type: ignore[arg-type]
+
+    def check_value(self, data: object) -> bool:
+        # not sure which values we should accept for structured dtypes.
+        try:
+            np.array([data], dtype=self.to_dtype())
+            return True  # noqa: TRY300
+        except ValueError:
+            return False
+
+    def _cast_value_unsafe(self, value: object) -> np.datetime64:
+        return self.to_dtype().type(value)  # type: ignore[no-any-return, call-overload]
 
 
 @dataclass(frozen=True, kw_only=True)
@@ -1356,9 +1206,9 @@ class Structured(ZDType[np.dtypes.VoidDType[int], np.void]):
     fields: tuple[tuple[str, ZDType[_BaseDType, _BaseScalar]], ...]
 
     def default_value(self) -> np.void:
-        return self.cast_value(0)
+        return self._cast_value_unsafe(0)
 
-    def cast_value(self, value: object) -> np.void:
+    def _cast_value_unsafe(self, value: object) -> np.void:
         return cast("np.void", np.array([value], dtype=self.to_dtype())[0])
 
     @classmethod
@@ -1379,7 +1229,7 @@ def check_dtype(cls, dtype: _BaseDType) -> TypeGuard[np.dtypes.VoidDType[int]]:
         return super().check_dtype(dtype) and dtype.fields is not None
 
     @classmethod
-    def _from_dtype_unsafe(cls, dtype: np.dtypes.VoidDType[int]) -> Self:
+    def _from_dtype_unsafe(cls, dtype: _BaseDType) -> Self:
         from zarr.core.dtype import get_data_type_from_native_dtype
 
         fields: list[tuple[str, ZDType[Any, Any]]] = []
@@ -1464,8 +1314,16 @@ def to_dtype(self) -> np.dtypes.VoidDType[int]:
             np.dtype([(key, dtype.to_dtype()) for (key, dtype) in self.fields]),
         )
 
-    def to_json_value(self, data: np.generic, *, zarr_format: ZarrFormat) -> str:
-        return bytes_to_json(data.tobytes(), zarr_format)
+    def to_json_value(self, data: object, *, zarr_format: ZarrFormat) -> str:
+        return bytes_to_json(self.cast_value(data).tobytes(), zarr_format)
+
+    def check_value(self, data: object) -> bool:
+        # not sure which values we should accept for structured dtypes.
+        try:
+            np.array([data], dtype=self.to_dtype())
+            return True  # noqa: TRY300
+        except ValueError:
+            return False
 
     def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.void:
         if not check_json_str(data):
diff --git a/src/zarr/core/dtype/common.py b/src/zarr/core/dtype/common.py
index 106da80a61..a53d2e7866 100644
--- a/src/zarr/core/dtype/common.py
+++ b/src/zarr/core/dtype/common.py
@@ -31,7 +31,7 @@ def check_json_bool(data: JSON) -> TypeGuard[bool]:
     Bool
         True if the data is a boolean, False otherwise.
     """
-    return bool(isinstance(data, bool))
+    return isinstance(data, bool)
 
 
 def check_json_str(data: JSON) -> TypeGuard[str]:
@@ -293,7 +293,7 @@ def complex_to_json_v3(data: complex | np.complexfloating[Any, Any]) -> tuple[JS
     return float_to_json_v3(data.real), float_to_json_v3(data.imag)
 
 
-def complex_to_json(
+def complex_float_to_json(
     data: complex | np.complexfloating[Any, Any], zarr_format: ZarrFormat
 ) -> tuple[JSONFloat, JSONFloat]:
     """
@@ -424,9 +424,7 @@ def float_from_json(data: JSONFloat, zarr_format: ZarrFormat) -> float:
         return float_from_json_v3(data)
 
 
-def complex_from_json_v2(
-    data: tuple[JSONFloat, JSONFloat], dtype: np.dtypes.Complex64DType | np.dtypes.Complex128DType
-) -> np.complexfloating[Any, Any]:
+def complex_float_from_json_v2(data: tuple[JSONFloat, JSONFloat]) -> complex:
     """
     Convert a JSON complex float to a complex number (v2).
 
@@ -434,20 +432,16 @@ def complex_from_json_v2(
     ----------
     data : tuple[JSONFloat, JSONFloat]
         The JSON complex float to convert.
-    dtype : Any
-        The numpy dtype.
 
     Returns
     -------
     np.complexfloating
         The complex number.
     """
-    return dtype.type(complex(float_from_json_v2(data[0]), float_from_json_v2(data[1])))
+    return complex(float_from_json_v2(data[0]), float_from_json_v2(data[1]))
 
 
-def complex_from_json_v3(
-    data: tuple[JSONFloat, JSONFloat], dtype: np.dtypes.Complex64DType | np.dtypes.Complex128DType
-) -> np.complexfloating[Any, Any]:
+def complex_float_from_json_v3(data: tuple[JSONFloat, JSONFloat]) -> complex:
     """
     Convert a JSON complex float to a complex number (v3).
 
@@ -455,20 +449,16 @@ def complex_from_json_v3(
     ----------
     data : tuple[JSONFloat, JSONFloat]
         The JSON complex float to convert.
-    dtype : Any
-        The numpy dtype.
 
     Returns
     -------
     np.complexfloating
         The complex number.
     """
-    return dtype.type(complex(float_from_json_v3(data[0]), float_from_json_v3(data[1])))
+    return complex(float_from_json_v3(data[0]), float_from_json_v3(data[1]))
 
 
-def complex_from_json(
-    data: tuple[JSONFloat, JSONFloat], dtype: Any, zarr_format: ZarrFormat
-) -> np.complexfloating[Any, Any]:
+def complex_float_from_json(data: tuple[JSONFloat, JSONFloat], zarr_format: ZarrFormat) -> complex:
     """
     Convert a JSON complex float to a complex number based on zarr format.
 
@@ -476,8 +466,6 @@ def complex_from_json(
     ----------
     data : tuple[JSONFloat, JSONFloat]
         The JSON complex float to convert.
-    dtype : Any
-        The numpy dtype.
     zarr_format : ZarrFormat
         The zarr format version.
 
@@ -487,12 +475,9 @@ def complex_from_json(
         The complex number.
     """
     if zarr_format == 2:
-        return complex_from_json_v2(data, dtype)
+        return complex_float_from_json_v2(data)
     else:
-        if check_json_complex_float_v3(data):
-            return complex_from_json_v3(data, dtype)
-        else:
-            raise TypeError(f"Invalid type: {data}. Expected a sequence of two numbers.")
+        return complex_float_from_json_v3(data)
     raise ValueError(f"Invalid zarr format: {zarr_format}. Expected 2 or 3.")
 
 
diff --git a/src/zarr/core/dtype/wrapper.py b/src/zarr/core/dtype/wrapper.py
index 3409fa7ca4..74e7bf79e1 100644
--- a/src/zarr/core/dtype/wrapper.py
+++ b/src/zarr/core/dtype/wrapper.py
@@ -17,13 +17,14 @@
 # This is the bound for the dtypes that we support. If we support non-numpy dtypes,
 # then this bound will need to be widened.
 _BaseDType = np.dtype[np.generic]
-TScalar = TypeVar("TScalar", bound=_BaseScalar)
+TScalar_co = TypeVar("TScalar_co", bound=_BaseScalar, covariant=True)
 # TODO: figure out an interface or protocol that non-numpy dtypes can use
-TDType = TypeVar("TDType", bound=_BaseDType)
+# These two type parameters are covariant because we want isinstance(ZDType[Subclass](), ZDType[BaseDType]) to be True
+TDType_co = TypeVar("TDType_co", bound=_BaseDType, covariant=True)
 
 
 @dataclass(frozen=True, kw_only=True, slots=True)
-class ZDType(Generic[TDType, TScalar], ABC):
+class ZDType(Generic[TDType_co, TScalar_co], ABC):
     """
     Abstract base class for wrapping native array data types, e.g. numpy dtypes
 
@@ -41,11 +42,11 @@ class ZDType(Generic[TDType, TScalar], ABC):
     # mypy currently disallows class variables to contain type parameters
     # but it seems OK for us to use it here:
     # https://github.com/python/typing/discussions/1424#discussioncomment-7989934
-    dtype_cls: ClassVar[type[TDType]]  # type: ignore[misc]
+    dtype_cls: ClassVar[type[TDType_co]]  # type: ignore[misc]
     _zarr_v3_name: ClassVar[str]
 
     @classmethod
-    def check_dtype(cls: type[Self], dtype: _BaseDType) -> TypeGuard[TDType]:
+    def check_dtype(cls: type[Self], dtype: _BaseDType) -> TypeGuard[TDType_co]:
         """
         Check that a data type matches the dtype_cls class attribute. Used as a type guard.
 
@@ -89,7 +90,7 @@ def from_dtype(cls: type[Self], dtype: _BaseDType) -> Self:
 
     @classmethod
     @abstractmethod
-    def _from_dtype_unsafe(cls: type[Self], dtype: TDType) -> Self:
+    def _from_dtype_unsafe(cls: type[Self], dtype: _BaseDType) -> Self:
         """
         Wrap a native dtype without checking.
 
@@ -106,7 +107,7 @@ def _from_dtype_unsafe(cls: type[Self], dtype: TDType) -> Self:
         ...
 
     @abstractmethod
-    def to_dtype(self: Self) -> TDType:
+    def to_dtype(self: Self) -> TDType_co:
         """
         Return an instance of the wrapped dtype.
 
@@ -117,8 +118,61 @@ def to_dtype(self: Self) -> TDType:
         """
         ...
 
+    def cast_value(self, data: object) -> TScalar_co:
+        """
+        Cast a value to the wrapped scalar type. The type is first checked for compatibility. If it's
+        incompatible with the associated scalar type, a ``TypeError`` will be raised.
+
+        Parameters
+        ----------
+        data : TScalar
+            The scalar value to cast.
+
+        Returns
+        -------
+        TScalar
+            The cast value.
+        """
+        if self.check_value(data):
+            return self._cast_value_unsafe(data)
+        raise TypeError(f"Invalid value: {data}")
+
+    @abstractmethod
+    def check_value(self, data: object) -> bool:
+        """
+        Check that a value is a valid value for the wrapped data type.
+
+        Parameters
+        ----------
+        data : object
+            A value to check.
+
+        Returns
+        -------
+        Bool
+            True if the value is valid, False otherwise.
+        """
+        ...
+
+    @abstractmethod
+    def _cast_value_unsafe(self, data: object) -> TScalar_co:
+        """
+        Cast a value to the wrapped data type. This method should not perform any input validation.
+
+        Parameters
+        ----------
+        data : TScalar
+            The scalar value to cast.
+
+        Returns
+        -------
+        TScalar
+            The cast value.
+        """
+        ...
+
     @abstractmethod
-    def default_value(self) -> TScalar:
+    def default_value(self) -> TScalar_co:
         """
         Get the default value for the wrapped data type. This is a method, rather than an attribute,
         because the default value for some data types may depend on parameters that are not known
@@ -216,7 +270,7 @@ def _from_json_unsafe(cls: type[Self], data: JSON, zarr_format: ZarrFormat) -> S
         ...
 
     @abstractmethod
-    def to_json_value(self, data: TScalar, *, zarr_format: ZarrFormat) -> JSON:
+    def to_json_value(self, data: object, *, zarr_format: ZarrFormat) -> JSON:
         """
         Convert a single value to JSON-serializable format.
 
@@ -235,7 +289,7 @@ def to_json_value(self, data: TScalar, *, zarr_format: ZarrFormat) -> JSON:
         ...
 
     @abstractmethod
-    def from_json_value(self: Self, data: JSON, *, zarr_format: ZarrFormat) -> TScalar:
+    def from_json_value(self: Self, data: JSON, *, zarr_format: ZarrFormat) -> TScalar_co:
         """
         Read a JSON-serializable value as a scalar.
 
diff --git a/src/zarr/core/metadata/v2.py b/src/zarr/core/metadata/v2.py
index d26ca52353..f3f738eea7 100644
--- a/src/zarr/core/metadata/v2.py
+++ b/src/zarr/core/metadata/v2.py
@@ -9,7 +9,7 @@
 
 from zarr.abc.metadata import Metadata
 from zarr.core.dtype import get_data_type_from_native_dtype
-from zarr.core.dtype.wrapper import TDType, TScalar, ZDType, _BaseDType, _BaseScalar
+from zarr.core.dtype.wrapper import TDType_co, TScalar_co, ZDType, _BaseDType, _BaseScalar
 
 if TYPE_CHECKING:
     from typing import Any, Literal, Self
@@ -58,7 +58,7 @@ def __init__(
         self,
         *,
         shape: ChunkCoords,
-        dtype: ZDType[TDType, TScalar],
+        dtype: ZDType[TDType_co, TScalar_co],
         chunks: ChunkCoords,
         fill_value: Any,
         order: MemoryOrder,
@@ -176,7 +176,7 @@ def to_dict(self) -> dict[str, JSON]:
             zarray_dict["filters"] = new_filters
 
         if self.fill_value is not None:
-            fill_value = self.dtype.to_json_value(self.fill_value, zarr_format=2)  # type: ignore[arg-type]
+            fill_value = self.dtype.to_json_value(self.fill_value, zarr_format=2)
             zarray_dict["fill_value"] = fill_value
 
         zarray_dict["dtype"] = self.dtype.to_json(zarr_format=2)
diff --git a/tests/conftest.py b/tests/conftest.py
index b2c106f2e2..ac419ae2f5 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -20,7 +20,8 @@
 from zarr.core.chunk_grids import RegularChunkGrid, _auto_partition
 from zarr.core.common import JSON, parse_shapelike
 from zarr.core.config import config as zarr_config
-from zarr.core.dtype import get_data_type_from_native_dtype
+from zarr.core.dtype import data_type_registry, get_data_type_from_native_dtype
+from zarr.core.dtype._numpy import DateTime64, HasLength, Structured
 from zarr.core.metadata.v2 import ArrayV2Metadata
 from zarr.core.metadata.v3 import ArrayV3Metadata
 from zarr.core.sync import sync
@@ -36,6 +37,7 @@
     from zarr.core.array import CompressorsLike, FiltersLike, SerializerLike, ShardsLike
     from zarr.core.chunk_key_encodings import ChunkKeyEncoding, ChunkKeyEncodingLike
     from zarr.core.common import ChunkCoords, MemoryOrder, ShapeLike, ZarrFormat
+    from zarr.core.dtype.wrapper import ZDType
 
 
 async def parse_store(
@@ -404,3 +406,17 @@ def meta_from_array(
         chunk_key_encoding=chunk_key_encoding,
         dimension_names=dimension_names,
     )
+
+
+# Generate a collection of zdtype instances for use in testing.
+zdtype_examples: tuple[ZDType[Any, Any], ...] = ()
+for wrapper_cls in data_type_registry.contents.values():
+    # The Structured dtype has to be constructed with some actual fields
+    if wrapper_cls is Structured:
+        zdtype_examples += (wrapper_cls.from_dtype(np.dtype([("a", np.float64), ("b", np.int8)])),)
+    elif issubclass(wrapper_cls, HasLength):
+        zdtype_examples += (wrapper_cls(length=1),)
+    elif issubclass(wrapper_cls, DateTime64):
+        zdtype_examples += (wrapper_cls(unit="s"),)
+    else:
+        zdtype_examples += (wrapper_cls(),)
diff --git a/tests/test_array.py b/tests/test_array.py
index 5ed5ba06b7..6a562f1d07 100644
--- a/tests/test_array.py
+++ b/tests/test_array.py
@@ -40,12 +40,15 @@
 from zarr.core.common import JSON, MemoryOrder, ZarrFormat
 from zarr.core.dtype import get_data_type_from_native_dtype
 from zarr.core.dtype._numpy import Float64
+from zarr.core.dtype.wrapper import ZDType
 from zarr.core.group import AsyncGroup
 from zarr.core.indexing import BasicIndexer, ceildiv
 from zarr.core.sync import sync
 from zarr.errors import ContainsArrayError, ContainsGroupError
 from zarr.storage import LocalStore, MemoryStore, StorePath
 
+from .conftest import zdtype_examples
+
 if TYPE_CHECKING:
     from zarr.core.array_spec import ArrayConfigLike
     from zarr.core.metadata.v2 import ArrayV2Metadata
@@ -177,32 +180,42 @@ def test_array_name_properties_with_group(
 
 @pytest.mark.parametrize("store", ["memory"], indirect=True)
 @pytest.mark.parametrize("specifiy_fill_value", [True, False])
-@pytest.mark.parametrize("dtype_str", ["bool", "uint8", "complex64"])
-def test_array_v3_fill_value_default(
-    store: MemoryStore, specifiy_fill_value: bool, dtype_str: str
+@pytest.mark.parametrize(
+    "zdtype", zdtype_examples, ids=tuple(str(type(v)) for v in zdtype_examples)
+)
+def test_array_fill_value_default(
+    store: MemoryStore, specifiy_fill_value: bool, zdtype: ZDType[Any, Any]
 ) -> None:
     """
     Test that creating an array with the fill_value parameter set to None, or unspecified,
     results in the expected fill_value attribute of the array, i.e. 0 cast to the array's dtype.
     """
     shape = (10,)
-    default_fill_value = 0
     if specifiy_fill_value:
         arr = zarr.create_array(
             store=store,
             shape=shape,
-            dtype=dtype_str,
+            dtype=zdtype,
             zarr_format=3,
             chunks=shape,
             fill_value=None,
         )
     else:
-        arr = zarr.create_array(
-            store=store, shape=shape, dtype=dtype_str, zarr_format=3, chunks=shape
-        )
+        arr = zarr.create_array(store=store, shape=shape, dtype=zdtype, zarr_format=3, chunks=shape)
+    expected_fill_value = zdtype.default_value()
+    if isinstance(expected_fill_value, np.datetime64 | np.timedelta64):
+        if np.isnat(expected_fill_value):
+            assert np.isnat(arr.fill_value)
+    elif isinstance(expected_fill_value, np.floating | np.complexfloating):
+        if np.isnan(expected_fill_value):
+            assert np.isnan(arr.fill_value)
+    else:
+        assert arr.fill_value == expected_fill_value
+    # A simpler check would be to ensure that arr.fill_value.dtype == arr.dtype
+    # But for some numpy data types (namely, U), scalars might not have length. An empty string
+    # scalar from a `>U4` array would have dtype `>U`, and arr.fill_value.dtype == arr.dtype will fail.
 
-    assert arr.fill_value == np.dtype(dtype_str).type(default_fill_value)
-    assert arr.fill_value.dtype == arr.dtype
+    assert type(arr.fill_value) is type(np.array([arr.fill_value], dtype=arr.dtype)[0])
 
 
 @pytest.mark.parametrize("store", ["memory"], indirect=True)
@@ -1004,7 +1017,7 @@ async def test_v3_chunk_encoding(
             filters=filters,
             compressors=compressors,
             serializer="auto",
-            dtype=arr.metadata.data_type,  # type: ignore[union-attr]
+            dtype=arr._zdtype,
         )
         assert arr.filters == filters_expected
         assert arr.compressors == compressors_expected
@@ -1369,4 +1382,4 @@ async def test_sharding_coordinate_selection() -> None:
         shards=(2, 4, 4),
     )
     arr[:] = np.arange(2 * 3 * 4).reshape((2, 3, 4))
-    assert (arr[1, [0, 1]] == np.array([[12, 13, 14, 15], [16, 17, 18, 19]])).all()  # type: ignore[index]
+    assert (arr[1, [0, 1]] == np.array([[12, 13, 14, 15], [16, 17, 18, 19]])).all()
diff --git a/tests/test_dtype.py b/tests/test_dtype.py
index f690e6ce26..122949664c 100644
--- a/tests/test_dtype.py
+++ b/tests/test_dtype.py
@@ -8,6 +8,8 @@
 import zarr
 from zarr.core.config import config
 
+from .conftest import zdtype_examples
+
 if TYPE_CHECKING:
     from collections.abc import Generator
 
@@ -64,6 +66,17 @@ def data_type_registry_fixture() -> DataTypeRegistry:
     VLEN_STRING_CODE = "O"
 
 
+def test_zdtype_examples() -> None:
+    """
+    Test that all the elements of the exported union type DTYPE have an example in the variable
+    zdtype_examples, which we use for testing.
+
+    If this test fails, that means that either there is a data type that does not have an example,
+    or there is a data type that is missing from the DTYPE union type.
+    """
+    assert set(map(type, zdtype_examples)) == set(get_args(DTYPE))
+
+
 @pytest.mark.parametrize(
     ("wrapper_cls", "np_dtype"),
     [
@@ -88,9 +101,7 @@ def data_type_registry_fixture() -> DataTypeRegistry:
         (DateTime64, "datetime64[s]"),
     ],
 )
-def test_wrap(
-    wrapper_cls: type[ZDType[_BaseDType, _BaseScalar]], np_dtype: np.dtype[np.generic] | str
-) -> None:
+def test_wrap(wrapper_cls: type[ZDType[Any, Any]], np_dtype: np.dtype[np.generic] | str) -> None:
     """
     Test that the wrapper class has the correct dtype class bound to the dtype_cls variable
     Test that the ``wrap`` method produces an instance of the wrapper class
@@ -102,19 +113,17 @@ def test_wrap(
 
     with pytest.raises(DataTypeValidationError, match="Invalid dtype"):
         wrapper_cls.from_dtype("not a dtype")  # type: ignore[arg-type]
-
     assert isinstance(wrapped, wrapper_cls)
     assert wrapped.to_dtype() == dt
 
 
-@pytest.mark.parametrize("wrapper_cls", get_args(DTYPE))
-def test_dict_serialization(wrapper_cls: Any, zarr_format: ZarrFormat) -> None:
-    if issubclass(wrapper_cls, Structured):
-        instance = wrapper_cls(fields=((("a", Bool()),)))
-    else:
-        instance = wrapper_cls()
-    as_dict = instance.to_json(zarr_format=zarr_format)
-    assert wrapper_cls.from_json(as_dict, zarr_format=zarr_format) == instance
+@pytest.mark.parametrize("zdtype", zdtype_examples)
+def test_to_json_roundtrip(zdtype: ZDType[Any, Any], zarr_format: ZarrFormat) -> None:
+    """
+    Test that a zdtype instance can round-trip through its JSON form
+    """
+    as_dict = zdtype.to_json(zarr_format=zarr_format)
+    assert zdtype.from_json(as_dict, zarr_format=zarr_format) == zdtype
 
 
 @pytest.mark.parametrize(
@@ -138,7 +147,7 @@ def test_dict_serialization(wrapper_cls: Any, zarr_format: ZarrFormat) -> None:
         (FixedLengthBytes(length=3), np.void(b"\x00\x00\x00")),
         (FixedLengthUnicode(length=3), np.str_("")),
         (
-            Structured(fields=(("a", Float64()), ("b", Int8()))),  # type: ignore[arg-type]
+            Structured(fields=(("a", Float64()), ("b", Int8()))),
             np.array([0], dtype=[("a", np.float64), ("b", np.int8)])[0],
         ),
         (VariableLengthString(), ""),
@@ -188,6 +197,42 @@ def test_to_json_value_v2(
     assert wrapper.to_json_value(input_value, zarr_format=2) == expected_json
 
 
+# NOTE! This test is currently a direct copy of the v2 version. When or if we change JSON serialization
+# in a v3-specific manner, this test must be changed.
+# TODO: Apply zarr-v3-specific changes to this test as needed
+@pytest.mark.parametrize(
+    ("wrapper", "input_value", "expected_json"),
+    [
+        (Bool(), np.bool_(True), True),
+        (Int8(), np.int8(42), 42),
+        (UInt8(), np.uint8(42), 42),
+        (Int16(), np.int16(42), 42),
+        (UInt16(), np.uint16(42), 42),
+        (Int32(), np.int32(42), 42),
+        (UInt32(), np.uint32(42), 42),
+        (Int64(), np.int64(42), 42),
+        (UInt64(), np.uint64(42), 42),
+        (Float16(), np.float16(42.0), 42.0),
+        (Float32(), np.float32(42.0), 42.0),
+        (Float64(), np.float64(42.0), 42.0),
+        (Complex64(), np.complex64(42.0 + 1.0j), (42.0, 1.0)),
+        (Complex128(), np.complex128(42.0 + 1.0j), (42.0, 1.0)),
+        (FixedLengthAscii(length=4), np.bytes_(b"test"), "dGVzdA=="),
+        (FixedLengthBytes(length=4), np.void(b"test"), "dGVzdA=="),
+        (FixedLengthUnicode(length=4), np.str_("test"), "test"),
+        (VariableLengthString(), "test", "test"),
+        (DateTime64(unit="s"), np.datetime64("2021-01-01T00:00:00", "s"), 1609459200),
+    ],
+)
+def test_to_json_value_v3(
+    wrapper: ZDType[_BaseDType, _BaseScalar], input_value: Any, expected_json: Any
+) -> None:
+    """
+    Test the to_json_value method for each dtype wrapper for zarr v3
+    """
+    assert wrapper.to_json_value(input_value, zarr_format=3) == expected_json
+
+
 @pytest.mark.parametrize(
     ("wrapper", "json_value", "expected_value"),
     [
@@ -227,7 +272,7 @@ def test_register(data_type_registry_fixture: DataTypeRegistry) -> None:
         """
         Test that registering a dtype in a data type registry works.
         """
-        data_type_registry_fixture.register(Bool._zarr_v3_name, Bool)  # type: ignore[arg-type]
+        data_type_registry_fixture.register(Bool._zarr_v3_name, Bool)
         assert data_type_registry_fixture.get(Bool._zarr_v3_name) == Bool
         assert isinstance(data_type_registry_fixture.match_dtype(np.dtype("bool")), Bool)
 
@@ -236,13 +281,13 @@ def test_override(data_type_registry_fixture: DataTypeRegistry) -> None:
         """
         Test that registering a new dtype with the same name works (overriding the previous one).
         """
-        data_type_registry_fixture.register(Bool._zarr_v3_name, Bool)  # type: ignore[arg-type]
+        data_type_registry_fixture.register(Bool._zarr_v3_name, Bool)
 
         class NewBool(Bool):
             def default_value(self) -> np.bool_:
                 return np.True_
 
-        data_type_registry_fixture.register(NewBool._zarr_v3_name, NewBool)  # type: ignore[arg-type]
+        data_type_registry_fixture.register(NewBool._zarr_v3_name, NewBool)
         assert isinstance(data_type_registry_fixture.match_dtype(np.dtype("bool")), NewBool)
 
     @staticmethod
@@ -275,30 +320,26 @@ def test_unregistered_dtype(data_type_registry_fixture: DataTypeRegistry) -> Non
             data_type_registry_fixture.get(outside_dtype)
 
     @staticmethod
-    @pytest.mark.parametrize("wrapper_cls", get_args(DTYPE))
+    @pytest.mark.parametrize("zdtype", zdtype_examples)
     def test_registered_dtypes(
-        wrapper_cls: type[ZDType[_BaseDType, _BaseScalar]], zarr_format: ZarrFormat
+        zdtype: ZDType[_BaseDType, _BaseScalar], zarr_format: ZarrFormat
     ) -> None:
         """
         Test that the registered dtypes can be retrieved from the registry.
         """
-        if issubclass(wrapper_cls, Structured):
-            instance = wrapper_cls(fields=((("a", Bool()),)))  # type: ignore[misc]
-        else:
-            instance = wrapper_cls()
 
-        assert data_type_registry.match_dtype(instance.to_dtype()) == instance
+        assert data_type_registry.match_dtype(zdtype.to_dtype()) == zdtype
         assert (
             data_type_registry.match_json(
-                instance.to_json(zarr_format=zarr_format), zarr_format=zarr_format
+                zdtype.to_json(zarr_format=zarr_format), zarr_format=zarr_format
             )
-            == instance
+            == zdtype
         )
 
     @staticmethod
-    @pytest.mark.parametrize("wrapper_cls", get_args(DTYPE))
+    @pytest.mark.parametrize("zdtype", zdtype_examples)
     def test_match_dtype_unique(
-        wrapper_cls: type[ZDType[_BaseDType, _BaseScalar]],
+        zdtype: ZDType[Any, Any],
         data_type_registry_fixture: DataTypeRegistry,
         zarr_format: ZarrFormat,
     ) -> None:
@@ -308,20 +349,16 @@ def test_match_dtype_unique(
         fails to match anything in the registry
         """
         for _cls in get_args(DTYPE):
-            if _cls is not wrapper_cls:
+            if _cls is not type(zdtype):
                 data_type_registry_fixture.register(_cls._zarr_v3_name, _cls)
 
-        if issubclass(wrapper_cls, Structured):
-            instance = wrapper_cls(fields=((("a", Bool()),)))  # type: ignore[misc]
-        else:
-            instance = wrapper_cls()
-        dtype_instance = instance.to_dtype()
+        dtype_instance = zdtype.to_dtype()
 
         msg = f"No data type wrapper found that matches dtype '{dtype_instance}'"
         with pytest.raises(ValueError, match=re.escape(msg)):
             data_type_registry_fixture.match_dtype(dtype_instance)
 
-        instance_dict = instance.to_json(zarr_format=zarr_format)
+        instance_dict = zdtype.to_json(zarr_format=zarr_format)
         msg = f"No data type wrapper found that matches {instance_dict}"
         with pytest.raises(ValueError, match=re.escape(msg)):
             data_type_registry_fixture.match_json(instance_dict, zarr_format=zarr_format)
diff --git a/tests/test_metadata/test_v3.py b/tests/test_metadata/test_v3.py
index bd5f9be8b6..cd30f5cf3f 100644
--- a/tests/test_metadata/test_v3.py
+++ b/tests/test_metadata/test_v3.py
@@ -13,7 +13,7 @@
 from zarr.core.config import config
 from zarr.core.dtype import get_data_type_from_native_dtype
 from zarr.core.dtype._numpy import DateTime64
-from zarr.core.dtype.common import complex_from_json
+from zarr.core.dtype.common import check_json_complex_float
 from zarr.core.group import GroupMetadata, parse_node_type
 from zarr.core.metadata.v3 import (
     ArrayV3Metadata,
@@ -28,7 +28,7 @@
     from typing import Any
 
     from zarr.abc.codec import Codec
-    from zarr.core.common import JSON
+    from zarr.core.common import JSON, ZarrFormat
 
 
 from zarr.core.metadata.v3 import (
@@ -135,17 +135,12 @@ def test_jsonify_fill_value_complex(fill_value: Any, dtype_str: str) -> None:
     assert dtype.to_json_value(observed, zarr_format=zarr_format) == tuple(fill_value)
 
 
-@pytest.mark.parametrize("dtype_str", [*complex_dtypes])
 @pytest.mark.parametrize("data", [[1.0, 0.0, 3.0], [0, 1, 3], [1]])
-def test_complex_to_json_invalid(data: object, dtype_str: str) -> None:
-    """
-    Test that parse_fill_value(fill_value, dtype) correctly rejects sequences with length not
-    equal to 2
-    """
-    dtype_instance = get_data_type_from_native_dtype(dtype_str)
-    match = f"Invalid type: {data}. Expected a sequence of two numbers."
-    with pytest.raises(TypeError, match=re.escape(match)):
-        complex_from_json(data=data, dtype=dtype_instance, zarr_format=3)
+def test_complex_to_json_invalid(data: object, zarr_format: ZarrFormat) -> None:
+    assert not check_json_complex_float(data, zarr_format=zarr_format)
+    # match = f"Invalid type: {data}. Expected a sequence of two numbers."
+    # with pytest.raises(TypeError, match=re.escape(match)):
+    # complex_float_from_json(data=data, zarr_format=3)
 
 
 @pytest.mark.parametrize("fill_value", [{"foo": 10}])

From cb0a7d49614b99b36e2d29ba450739ec71c3da16 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Mon, 24 Mar 2025 17:20:37 +0100
Subject: [PATCH 049/130] update data types documentation, and expose
 core/dtype module to autodoc

---
 docs/conf.py                   |   5 +-
 docs/user-guide/data_types.rst | 203 ++++++++++-----------------------
 2 files changed, 66 insertions(+), 142 deletions(-)

diff --git a/docs/conf.py b/docs/conf.py
index d69309d432..8a9835e4cb 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -68,7 +68,10 @@ def skip_submodules(
       ) -> bool:
     # Skip documenting zarr.codecs submodules
     # codecs are documented in the main zarr.codecs namespace
-    if what == "module" and name.startswith("zarr.codecs.") or name.startswith("zarr.core"):
+    # TODO: just document everything instead using this weak case-by-case logic
+    if what == "module" and name.startswith("zarr.core.dtype."):
+        skip = False
+    elif what == "module" and name.startswith("zarr.codecs.") or name.startswith("zarr.core"):
         skip = True
     return skip
 
diff --git a/docs/user-guide/data_types.rst b/docs/user-guide/data_types.rst
index 36a9ea40f7..a281b349de 100644
--- a/docs/user-guide/data_types.rst
+++ b/docs/user-guide/data_types.rst
@@ -16,14 +16,14 @@ Zarr-Python supports creating arrays with Numpy data types::
 
 Unlike Numpy arrays, Zarr arrays are designed to be persisted to storage and read by Zarr implementations in different programming languages.
 This means Zarr data types must be interpreted correctly when clients read an array. So each Zarr data type defines a procedure for
-encoding / decoding that data type to / from Zarr array metadata, and also encoding / decoding **instances** of that data type to / from
+encoding/decoding that data type to/from Zarr array metadata, and also encoding/decoding **instances** of that data type to/from
 array metadata. These serialization procedures depend on the Zarr format.
 
 Data types in Zarr version 2
 -----------------------------
 
 Version 2 of the Zarr format defined its data types relative to `Numpy's data types <https://numpy.org/doc/2.1/reference/arrays.dtypes.html#data-type-objects-dtype>`_, and added a few non-Numpy data types as well.
-Thus the JSON identifier for a Numpy-compatible data type is just the Numpy ``str`` attribute of that dtype:
+Thus the JSON identifier for a Numpy-compatible data type is just the Numpy ``str`` attribute of that dtype::
 
     >>> import zarr
     >>> import numpy as np
@@ -32,158 +32,79 @@ Thus the JSON identifier for a Numpy-compatible data type is just the Numpy ``st
     >>> np_dtype = np.dtype('int64')
     >>> z = zarr.create_array(store=store, shape=(1,), dtype=np_dtype, zarr_format=2)
     >>> dtype_meta = json.loads(store['.zarray'].to_bytes())["dtype"]
-    >>> assert dtype_meta == np_dtype.str # True
+    >>> assert dtype_meta == np_dtype.str  # True
     >>> dtype_meta
     '<i8'
 
 .. note::
-  The ``<`` character in the data type metadata encodes the `endianness <https://numpy.org/doc/2.2/reference/generated/numpy.dtype.byteorder.html>`_, or "byte order", of the data type. Following Numpy's example,
-  in Zarr version 2 each data type has an endianness where applicable. However, Zarr version 3 data types do not store endianness information.
+   The ``<`` character in the data type metadata encodes the `endianness <https://numpy.org/doc/2.2/reference/generated/numpy.dtype.byteorder.html>`_, or "byte order", of the data type. Following Numpy's example,
+   in Zarr version 2 each data type has an endianness where applicable. However, Zarr version 3 data types do not store endianness information.
 
 In addition to defining a representation of the data type itself (which in the example above was just a simple string ``"<i8"``), Zarr also
 defines a metadata representation of scalars associated with that data type. Integers are stored as ``JSON`` numbers,
 as are floats, with the caveat that `NaN`, positive infinity, and negative infinity are stored as special strings.
 
 Data types in Zarr version 3
-----------------------------
-(note: placeholder text)
-* Data type names are different -- Zarr V2 represented the 16 bit unsigned integer data type as ``>i2``; Zarr V3 represents the same data type as ``int16``.
-* No endianness
-* A data type can be encoded in metadata as a string or a ``JSON`` object with the structure ``{"name": <string identifier>, "configuration": {...}}``
+-----------------------------
+
+Zarr V3 brings several key changes to how data types are represented:
+
+- Zarr V3 identifies the basic data types as strings like ``int8``, ``int16``, etc. In Zarr V2 ``int8`` would represented as ``|i1``,  ``int16`` would be ``>i2`` **or** ``<i2``, depending on the endianness.
+- A Zarr V3 data type does not have endianness. This is a departure from Zarr V2, where multi-byte data types would be stored in ``JSON`` with an encoding that included endianness. Instead,
+  Zarr V3 requires that endianness, where applicable, is specified in the ``codecs`` attribute of array metadata.
+- Zarr V3 data types can also take the form of a ``JSON`` object like
+  ``{"name": "foo", "configuration": {"parameter": "value"}}``. This structure facilitates specifying data types that take parameters.
+
 
 Data types in Zarr-Python
 -------------------------
 
-Zarr-Python supports two different Zarr formats, and those two formats specify data types in rather different ways:
-data types in Zarr version 2 are encoded as Numpy-compatible strings, while data types in Zarr version 3 are encoded as either strings or ``JSON`` objects,
+The two Zarr formats that Zarr-Python supports specify data types in two different ways:
+data types in Zarr version 2 are encoded as Numpy-compatible strings, while data types in Zarr version
+3 are encoded as either strings or ``JSON`` objects,
 and the Zarr V3 data types don't have any associated endianness information, unlike Zarr V2 data types.
 
-We aspire for Zarr-Python to eventually be array-library-agnostic.
-In the context of data types, this means that we should not design an API that overfits to Numpy's data types.
-We will use the term "native data type" to refer to a data type used by any external array library (including Numpy), e.g. ``np.dtypes.Float64DType()``.
-We will also use the term "native scalar" or "native scalar type" to refer to a scalar value of a native data type. For example, ``np.float64(0)`` generates a scalar with the data dtype ``np.dtypes.Float64DType``
-
-Zarr-Python needs to support the following operations on native data types:
-
-* Round-trip native data types to fields in array metadata documents.
-    For example, the Numpy data type ``np.dtype('>i2')`` should be saved as ``{..., "dtype" : ">i2"}`` in Zarr V2 metadata.
-
-    In Zarr V3 metadata, the same Numpy data type would be saved as  ``{..., "data_type": "int16", "codecs": [..., {"name": "bytes", "configuration": {"endian": "big"}, ...]}``
-
-* Associate a default fill value with a native data type. This is not mandated by the Zarr specifications, but it's convenient for users
-  to have a useful default. For numeric types like integers and floats the default can be statically set to 0, but for
-  parametric data types like fixed-length strings the default can only be generated after the data type has been parametrized at runtime.
-
-* Round-trip native scalars to the ``fill_value`` field in Zarr V2 and V3 array metadata documents. The Zarr V2 and V3 specifications
-  define how scalars of each data type should be stored as JSON in array metadata documents, and in principle each data type
-  can define this encoding separately.
-
-* Do all of the above for *user-defined data types*. Zarr-Python should support data types added as extensions,so we cannot
-  hard-code the list of data types. We need to ensure that users can easily (or easily enough) define a python object
-  that models their custom data type and register this object with Zarr-Python, so that the above operations all succeed for their
-  custom data type.
-
-To achieve these goals, Zarr Python uses a class called :class:`zarr.core.dtype.DTypeWrapper` to wrap native data types. Each data type
-supported by Zarr Python is modeled by a subclass of `DTypeWrapper`, which has the following structure:
-
-(attribute) ``dtype_cls``
-^^^^^^^^^^^^^^^^^^^^^^^^^
-The ``dtype_cls`` attribute is a **class variable** that is bound to a class that can produce
-an instance of a native data type. For example, on the ``DTypeWrapper`` used to model the boolean
-data type, the ``dtype_cls`` attribute is bound to the numpy bool data type class: ``np.dtypes.BoolDType``.
-This attribute is used when we need to create an instance of the native data type, for example when
-defining a Numpy array that will contain Zarr data.
-
-It might seem odd that ``DTypeWrapper.dtype_cls`` binds to a *class* that produces a native data type instead of an instance of that native data type --
-why not have a ``DTypeWrapper.dtype`` attribute that binds to ``np.dtypes.BoolDType()``? The reason why ``DTypeWrapper``
-doesn't wrap a concrete data type instance is because data type instances may have endianness information, but Zarr V3
-data types do not. To model Zarr V3 data types, we need endianness to be an **instance variable** which is
-defined when creating an instance of the ```DTypeWrapper``. Subclasses of ``DTypeWrapper`` that model data types with
-byte order semantics thus have ``endianness`` as an instance variable, and this value can be set when creating an instance of the wrapper.
-
-
-(attribute) ``_zarr_v3_name``
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-The ``_zarr_v3_name`` attribute encodes the canonical name for a data type for Zarr V3. For many data types these names
-are defined in the `Zarr V3 specification <https://zarr-specs.readthedocs.io/en/latest/v3/core/v3.0.html#data-types>`_ For nearly all of the
-data types defined in Zarr V3, this name can be used to uniquely specify a data type. The one exception is the ``r*`` data type,
-which is parametrized by a number of bits, and so may take the form ``r8``, ``r16``, ... etc.
-
-(class method) ``from_dtype(cls, dtype) -> Self``
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-This method defines a procedure for safely converting a native dtype instance into an instance of ``DTypeWrapper``. It should perform
-validation of its input to ensure that the native dtype is an instance of the ``dtype_cls`` class attribute, for example. For some
-data types, additional checks are needed -- in Numpy "structured" data types and "void" data types use the same class, with different properties.
-A ``DTypeWrapper`` that wraps Numpy structured data types must do additional checks to ensure that the input ``dtype`` is actually a structured data type.
-If input validation succeeds, this method will call ``_from_dtype_unsafe``.
-
-(method) ``to_dtype(self) -> dtype``
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-This method produces a native data type consistent with the properties of the ``DTypeWrapper``. Together
-with ``from_dtype``, this method allows round-trip conversion of a native data type in to a wrapper class and then out again.
-
-That is, for some ``DTypeWrapper`` class ``FooWrapper`` that wraps a native data type called ``foo``, ``FooWrapper.from_dtype(instance_of_foo).to_dtype() == instance_of_foo`` should be true.
-
-(method) ``to_dict(self) -> dict``
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-This method generates a JSON-serialiazable representation of the wrapped data type which can be stored in
-Zarr metadata.
-
-(method) ``cast_value(self, value: object) -> scalar``
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-This method converts a python object to an instance of the wrapped data type. It is used for generating the default
-value associated with this data type.
-
-
-(method) ``default_value(self) -> scalar``
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-This method returns the default value for the wrapped data type. Zarr-Python uses this method to generate a default fill value
-for an array when a user has not requested one.
-
-Why is this a method and not a static attribute? Although some data types
-can have a static default value, parametrized data types like fixed-length strings or structured data types cannot. For these data types,
-a default value must be calculated based on the attributes of the wrapped data type.
-
-(class method) ``check_dtype(cls, dtype) -> bool``
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-This class method checks if a native dtype is compatible with the ``DTypeWrapper`` class. It returns ``True``
-if ``dtype`` is compatible with the wrapper class, and ``False`` otherwise. For many data types, this check is as simple
-as checking that ``cls.dtype_cls`` matches ``type(dtype)``, i.e. checking that the data type class wrapped
-by the ``DTypeWrapper`` is the same as the class of ``dtype``. But there are some data types where this check alone is not sufficient,
-in which case this method is overridden so that additional properties of ``dtype`` can be inspected and compared with
-the expectations of ``cls``.
-
-(class method) ``from_dict(cls, dtype) -> Self``
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-This class method creates a ``DTypeWrapper`` from an appropriately structured dictionary. The default
-implementation first checks that the dictionary has the correct structure, and then uses its data
-to instantiate the ``DTypeWrapper`` instance.
-
-(method) ``to_dict(self) -> dict[str, JSON]``
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-Returns a dictionary form of the wrapped data type. This is used prior to writing array metadata.
-
-(class method) ``get_name(self, zarr_format: Literal[2, 3]) -> str``
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-This method generates a name for the wrapped data type, depending on the Zarr format. If ``zarr_format`` is
-2 and the wrapped data type is a Numpy data type, then the Numpy string representation of that data type is returned.
-If ``zarr_format`` is 3, then the Zarr V3 name for the wrapped data type is returned. For most data types
-the Zarr V3 name will be stored as the ``_zarr_v3_name`` class attribute, but for parametric data types the
-name must be computed at runtime based on the parameters of the data type.
-
-
-(method) ``to_json_value(self, data: scalar, zarr_format: Literal[2, 3]) -> JSON``
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-This method converts a scalar instance of the data type into a JSON-serialiazable value.
-For some data types like bool and integers this conversion is simple -- just return a JSON boolean
-or number -- but other data types define a JSON serialization for scalars that is a bit more involved.
-And this JSON serialization depends on the Zarr format.
-
-(method) ``from_json_value(self, data: JSON, zarr_format: Literal[2, 3]) -> scalar``
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-Convert a JSON-serialiazed scalar to a native scalar. This inverts the operation of ``to_json_value``.
-
-Using a custom data type
-------------------------
-
-TODO
\ No newline at end of file
+To abstract over these syntactical and semantic differences, Zarr-Python uses a class called `ZDType <../api/zarr/dtype/index.html#zarr.dtype.ZDType>`_ to wrap native data types (e.g., Numpy data types) and provide Zarr V2 and Zarr V3 compatibility routines.
+Each data type supported by Zarr-Python is modeled by a subclass of ``ZDType``, which provides an API for the following operations:
+
+- Wrapping / unwrapping a native data type
+- Encoding / decoding a data type to / from Zarr V2 and Zarr V3 array metadata.
+- Encoding / decoding a scalar value to / from Zarr V2 and Zarr V3 array metadata.
+
+
+Example Usage
+~~~~~~~~~~~~~
+
+.. code-block:: python
+
+    from zarr.core.dtype.wrapper import Int8
+
+    # Create a ZDType instance from a native dtype
+    int8 = Int8.from_dtype(np.dtype('int8'))
+
+    # Convert back to native dtype
+    native_dtype = int8.to_dtype()
+    assert native_dtype == np.dtype('int8')
+
+    # Get the default value
+    default_value = int8.default_value()
+    assert default_value == np.int8(0)
+
+    # Serialize to JSON
+    json_representation = int8.to_json(zarr_format=3)
+
+    # Serialize a scalar value
+    json_value = int8.to_json_value(42, zarr_format=3)
+    assert json_value == 42
+
+    # Deserialize a scalar value
+    scalar_value = int8.from_json_value(42, zarr_format=3)
+    assert scalar_value == np.int8(42)
+
+Custom Data Types
+~~~~~~~~~~~~~~~~~
+
+Users can define custom data types by subclassing `ZDType` and implementing the required methods.
+Once defined, the custom data type can be registered with Zarr-Python to enable seamless integration with the library.
+
+<TODO: example of defining a custom data type>
\ No newline at end of file

From 9989c64114364f8a5381a7423be4cb4bfedb9461 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Mon, 24 Mar 2025 17:43:12 +0100
Subject: [PATCH 050/130] add failing endianness round-trip test

---
 tests/test_array.py | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/tests/test_array.py b/tests/test_array.py
index 6a562f1d07..ac35012fa1 100644
--- a/tests/test_array.py
+++ b/tests/test_array.py
@@ -7,7 +7,7 @@
 import re
 import sys
 from itertools import accumulate
-from typing import TYPE_CHECKING, Any, Literal
+from typing import TYPE_CHECKING, Any, Literal, get_args
 from unittest import mock
 
 import numcodecs
@@ -39,7 +39,8 @@
 from zarr.core.chunk_grids import _auto_partition
 from zarr.core.common import JSON, MemoryOrder, ZarrFormat
 from zarr.core.dtype import get_data_type_from_native_dtype
-from zarr.core.dtype._numpy import Float64
+from zarr.core.dtype._numpy import Float64, endianness_from_numpy_str
+from zarr.core.dtype.common import Endianness
 from zarr.core.dtype.wrapper import ZDType
 from zarr.core.group import AsyncGroup
 from zarr.core.indexing import BasicIndexer, ceildiv
@@ -1383,3 +1384,20 @@ async def test_sharding_coordinate_selection() -> None:
     )
     arr[:] = np.arange(2 * 3 * 4).reshape((2, 3, 4))
     assert (arr[1, [0, 1]] == np.array([[12, 13, 14, 15], [16, 17, 18, 19]])).all()
+
+
+@pytest.mark.parametrize("store", ["memory"], indirect=True)
+@pytest.mark.parametrize("endianness", get_args(Endianness))
+def test_endianness(store: Store, zarr_format: ZarrFormat, endianness: Endianness) -> None:
+    """
+    Test that that endianness is correctly set when creating an array.
+    """
+    if endianness == "little":
+        np_dtype = "<i2"
+    else:
+        np_dtype = ">i2"
+
+    arr = zarr.create_array(store=store, shape=(1,), dtype=np_dtype, zarr_format=zarr_format)
+    assert endianness_from_numpy_str(arr[:].dtype.byteorder) == endianness
+    if zarr_format == 3:
+        assert str(arr.metadata.codecs[0].endian.value) == endianness

From a276c84de2954ae5487d96558558bf705b41d20b Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Mon, 24 Mar 2025 18:28:45 +0100
Subject: [PATCH 051/130] fix endianness

---
 src/zarr/core/array.py | 33 +++++++++++++++++++++++++++++++
 src/zarr/dtype.py      |  3 +++
 tests/test_array.py    | 45 ++++++++++++++++++++++++++++++++----------
 3 files changed, 71 insertions(+), 10 deletions(-)
 create mode 100644 src/zarr/dtype.py

diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py
index 6c34c0d351..978e7d0c62 100644
--- a/src/zarr/core/array.py
+++ b/src/zarr/core/array.py
@@ -69,6 +69,7 @@
     ZDTypeLike,
     parse_data_type,
 )
+from zarr.core.dtype._numpy import HasEndianness
 from zarr.core.indexing import (
     BasicIndexer,
     BasicSelection,
@@ -4246,6 +4247,24 @@ def _get_default_chunk_encoding_v3(
     else:
         serializer = zarr_config.get("array.v3_default_serializer.default")
 
+    # Modify the default serializer so that it matches the endianness of the dtype, otherwise unset the
+    # endian key
+
+    # This is effective problematic for many reasons:
+    # - we are assuming that endianness is set by the serializer, when it could also be changed
+    # by any one of the filters.
+    # - we are assuming that the serializer has a specific configuration. A different serializer that
+    # alters endianness might not use the same configuration structure.
+    # - we are mutating a configuration dictionary. It would be much better to work with the codec
+    # api for this.
+    # All of these things are acceptable right now because there is only 1 serializer that affects
+    # endianness, but this design will not last if this situation changes.
+    if "endian" in serializer["configuration"]:
+        if isinstance(dtype, HasEndianness):
+            serializer["configuration"]["endian"] = dtype.endianness
+        else:
+            serializer["configuration"].pop("endian")
+
     return (
         tuple(_parse_array_array_codec(f) for f in filters),
         _parse_array_bytes_codec(serializer),
@@ -4352,6 +4371,20 @@ def _parse_chunk_encoding_v3(
         out_array_bytes = default_array_bytes
     else:
         out_array_bytes = _parse_array_bytes_codec(serializer)
+        # check that the endianness of the requested serializer matches the dtype of the data, if applicable
+        if (
+            isinstance(out_array_bytes, BytesCodec)
+            and isinstance(dtype, HasEndianness)
+            and (
+                out_array_bytes.endian is None
+                or str(out_array_bytes.endian.value) != dtype.endianness
+            )
+        ):
+            msg = (
+                f"The endianness of the requested serializer ({out_array_bytes}) does not match the endianness of the dtype ({dtype.endianness}). "
+                "The endianness of the serializer and the dtype must match."
+            )
+            raise ValueError(msg)
 
     if compressors is None:
         out_bytes_bytes: tuple[BytesBytesCodec, ...] = ()
diff --git a/src/zarr/dtype.py b/src/zarr/dtype.py
new file mode 100644
index 0000000000..6e3789543b
--- /dev/null
+++ b/src/zarr/dtype.py
@@ -0,0 +1,3 @@
+from zarr.core.dtype import ZDType, data_type_registry
+
+__all__ = ["ZDType", "data_type_registry"]
diff --git a/tests/test_array.py b/tests/test_array.py
index ac35012fa1..20de0d6032 100644
--- a/tests/test_array.py
+++ b/tests/test_array.py
@@ -39,11 +39,12 @@
 from zarr.core.chunk_grids import _auto_partition
 from zarr.core.common import JSON, MemoryOrder, ZarrFormat
 from zarr.core.dtype import get_data_type_from_native_dtype
-from zarr.core.dtype._numpy import Float64, endianness_from_numpy_str
+from zarr.core.dtype._numpy import Float64, Int16, endianness_from_numpy_str
 from zarr.core.dtype.common import Endianness
 from zarr.core.dtype.wrapper import ZDType
 from zarr.core.group import AsyncGroup
 from zarr.core.indexing import BasicIndexer, ceildiv
+from zarr.core.metadata.v3 import ArrayV3Metadata
 from zarr.core.sync import sync
 from zarr.errors import ContainsArrayError, ContainsGroupError
 from zarr.storage import LocalStore, MemoryStore, StorePath
@@ -53,7 +54,6 @@
 if TYPE_CHECKING:
     from zarr.core.array_spec import ArrayConfigLike
     from zarr.core.metadata.v2 import ArrayV2Metadata
-    from zarr.core.metadata.v3 import ArrayV3Metadata
 
 
 @pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"])
@@ -1388,16 +1388,41 @@ async def test_sharding_coordinate_selection() -> None:
 
 @pytest.mark.parametrize("store", ["memory"], indirect=True)
 @pytest.mark.parametrize("endianness", get_args(Endianness))
-def test_endianness(store: Store, zarr_format: ZarrFormat, endianness: Endianness) -> None:
+def test_default_endianness(store: Store, zarr_format: ZarrFormat, endianness: Endianness) -> None:
     """
-    Test that that endianness is correctly set when creating an array.
+    Test that that endianness is correctly set when creating an array when not specifying a serializer
+    """
+    dtype = Int16(endianness=endianness)
+    arr = zarr.create_array(store=store, shape=(1,), dtype=dtype, zarr_format=zarr_format)
+    assert endianness_from_numpy_str(arr[:].dtype.byteorder) == endianness
+    if zarr_format == 3:
+        assert isinstance(arr.metadata, ArrayV3Metadata)  # mypy
+        assert str(arr.metadata.codecs[0].endian.value) == endianness  # type: ignore[union-attr]
+
+
+@pytest.mark.parametrize("store", ["memory"], indirect=True)
+@pytest.mark.parametrize("endianness", get_args(Endianness))
+def test_explicit_endianness(store: Store, endianness: Endianness) -> None:
+    """
+    Test that that a mismatch between the bytescodec endianness and the dtype endianness is an error
     """
     if endianness == "little":
-        np_dtype = "<i2"
+        dtype = Int16(endianness="big")
     else:
-        np_dtype = ">i2"
+        dtype = Int16(endianness="little")
 
-    arr = zarr.create_array(store=store, shape=(1,), dtype=np_dtype, zarr_format=zarr_format)
-    assert endianness_from_numpy_str(arr[:].dtype.byteorder) == endianness
-    if zarr_format == 3:
-        assert str(arr.metadata.codecs[0].endian.value) == endianness
+    serializer = BytesCodec(endian=endianness)
+
+    msg = (
+        f"The endianness of the requested serializer ({serializer}) does not match the endianness of the dtype ({dtype.endianness}). "
+        "The endianness of the serializer and the dtype must match."
+    )
+
+    with pytest.raises(ValueError, match=re.escape(msg)):
+        _ = zarr.create_array(
+            store=store,
+            shape=(1,),
+            dtype=dtype,
+            zarr_format=3,
+            serializer=serializer,
+        )

From 62857396a12bd5899024ee4b48f2f23f8c9ec035 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Mon, 24 Mar 2025 18:38:08 +0100
Subject: [PATCH 052/130] additional check in test_explicit_endianness

---
 tests/test_array.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/tests/test_array.py b/tests/test_array.py
index 20de0d6032..f08018960f 100644
--- a/tests/test_array.py
+++ b/tests/test_array.py
@@ -1426,3 +1426,19 @@ def test_explicit_endianness(store: Store, endianness: Endianness) -> None:
             zarr_format=3,
             serializer=serializer,
         )
+
+    # additional check for the case where the serializer has endian=None
+    none_serializer = dataclasses.replace(serializer, endian=None)
+    msg = (
+        f"The endianness of the requested serializer ({none_serializer}) does not match the endianness of the dtype ({dtype.endianness}). "
+        "The endianness of the serializer and the dtype must match."
+    )
+
+    with pytest.raises(ValueError, match=re.escape(msg)):
+        _ = zarr.create_array(
+            store=store,
+            shape=(1,),
+            dtype=dtype,
+            zarr_format=3,
+            serializer=none_serializer,
+        )

From 2bffe1a6ccd3aed4f7e1b708fd110fc9418cb9dd Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Mon, 24 Mar 2025 21:22:52 +0100
Subject: [PATCH 053/130] add failing test for round-tripping vlen strings

---
 src/zarr/core/array.py          |   4 +-
 src/zarr/core/dtype/__init__.py |   7 +-
 src/zarr/core/dtype/_numpy.py   |   2 +-
 src/zarr/core/dtype/common.py   |   5 +-
 tests/test_array.py             | 181 +++++++++++++++++++++-----------
 5 files changed, 130 insertions(+), 69 deletions(-)

diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py
index 978e7d0c62..cba4a49410 100644
--- a/src/zarr/core/array.py
+++ b/src/zarr/core/array.py
@@ -4259,11 +4259,11 @@ def _get_default_chunk_encoding_v3(
     # api for this.
     # All of these things are acceptable right now because there is only 1 serializer that affects
     # endianness, but this design will not last if this situation changes.
-    if "endian" in serializer["configuration"]:
+    if serializer.get("configuration") is not None:
         if isinstance(dtype, HasEndianness):
             serializer["configuration"]["endian"] = dtype.endianness
         else:
-            serializer["configuration"].pop("endian")
+            serializer["configuration"].pop("endian", None)
 
     return (
         tuple(_parse_array_array_codec(f) for f in filters),
diff --git a/src/zarr/core/dtype/__init__.py b/src/zarr/core/dtype/__init__.py
index 021b6b48e2..f9b1364011 100644
--- a/src/zarr/core/dtype/__init__.py
+++ b/src/zarr/core/dtype/__init__.py
@@ -99,7 +99,12 @@ def get_data_type_from_native_dtype(dtype: npt.DTypeLike) -> ZDType[_BaseDType,
             # this is a valid _VoidDTypeLike check
             na_dtype = np.dtype([tuple(d) for d in dtype])
         else:
-            na_dtype = np.dtype(dtype)
+            if dtype == "|T16":
+                # `|T16` is the numpy dtype str form for variable length strings. unfortunately
+                # numpy cannot create these directly from np.dtype("|T16")
+                na_dtype = np.dtypes.StringDType()
+            else:
+                na_dtype = np.dtype(dtype)
     else:
         na_dtype = dtype
     return data_type_registry.match_dtype(na_dtype)
diff --git a/src/zarr/core/dtype/_numpy.py b/src/zarr/core/dtype/_numpy.py
index 38597f8fee..cab849cf74 100644
--- a/src/zarr/core/dtype/_numpy.py
+++ b/src/zarr/core/dtype/_numpy.py
@@ -907,7 +907,7 @@ def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.void:
         raise DataTypeValidationError(f"Invalid type: {data}. Expected a string.")
 
     def check_value(self, data: object) -> bool:
-        return isinstance(data, np.bytes_ | str | bytes)
+        return isinstance(data, np.bytes_ | str | bytes | np.void)
 
     def _cast_value_unsafe(self, value: object) -> np.void:
         return self.to_dtype().type(value)  # type: ignore[call-overload, no-any-return]
diff --git a/src/zarr/core/dtype/common.py b/src/zarr/core/dtype/common.py
index a53d2e7866..900b3fddbd 100644
--- a/src/zarr/core/dtype/common.py
+++ b/src/zarr/core/dtype/common.py
@@ -356,7 +356,10 @@ def bytes_from_json(data: str, zarr_format: ZarrFormat) -> bytes:
     """
     if zarr_format == 2:
         return base64.b64decode(data.encode("ascii"))
-    raise NotImplementedError(f"Invalid zarr format: {zarr_format}. Expected 2.")
+    # TODO: differentiate these as needed. This is a spec question.
+    if zarr_format == 3:
+        return base64.b64decode(data.encode("ascii"))
+    raise ValueError(f"Invalid zarr format: {zarr_format}. Expected 2 or 3.")
 
 
 def float_from_json_v2(data: JSONFloat) -> float:
diff --git a/tests/test_array.py b/tests/test_array.py
index f08018960f..7b8b72f119 100644
--- a/tests/test_array.py
+++ b/tests/test_array.py
@@ -39,7 +39,13 @@
 from zarr.core.chunk_grids import _auto_partition
 from zarr.core.common import JSON, MemoryOrder, ZarrFormat
 from zarr.core.dtype import get_data_type_from_native_dtype
-from zarr.core.dtype._numpy import Float64, Int16, endianness_from_numpy_str
+from zarr.core.dtype._numpy import (
+    DateTime64,
+    Float64,
+    Int16,
+    Structured,
+    endianness_from_numpy_str,
+)
 from zarr.core.dtype.common import Endianness
 from zarr.core.dtype.wrapper import ZDType
 from zarr.core.group import AsyncGroup
@@ -936,12 +942,59 @@ def test_chunks_and_shards(store: Store) -> None:
         assert arr_v2.shards is None
 
     @staticmethod
-    @pytest.mark.parametrize(
-        ("dtype", "fill_value_expected"), [("<U4", ""), ("<S4", b""), ("i", 0), ("f", 0.0)]
-    )
-    def test_default_fill_value(dtype: str, fill_value_expected: object, store: Store) -> None:
+    @pytest.mark.parametrize("dtype", zdtype_examples)
+    def test_default_fill_value(dtype: ZDType[Any, Any], store: Store) -> None:
+        """
+        Test that the fill value of an array is set to the default value for the dtype object
+        """
         a = zarr.create_array(store, shape=(5,), chunks=(5,), dtype=dtype)
-        assert a.fill_value == fill_value_expected
+        if isinstance(dtype, DateTime64) and np.isnat(a.fill_value):
+            assert np.isnat(dtype.default_value())
+        else:
+            assert a.fill_value == dtype.default_value()
+
+    @staticmethod
+    @pytest.mark.parametrize("dtype", zdtype_examples)
+    def test_dtype_forms(dtype: ZDType[Any, Any], store: Store, zarr_format: ZarrFormat) -> None:
+        """
+        Test that the same array is produced from a ZDType instance, a numpy dtype, or a numpy string
+        """
+        a = zarr.create_array(
+            store, name="a", shape=(5,), chunks=(5,), dtype=dtype, zarr_format=zarr_format
+        )
+        b = zarr.create_array(
+            store,
+            name="b",
+            shape=(5,),
+            chunks=(5,),
+            dtype=dtype.to_dtype(),
+            zarr_format=zarr_format,
+        )
+        assert a.dtype == b.dtype
+
+        # Structured dtypes do not have a numpy string representation that uniquely identifies them
+        if not isinstance(dtype, Structured):
+            c = zarr.create_array(
+                store,
+                name="c",
+                shape=(5,),
+                chunks=(5,),
+                dtype=dtype.to_dtype().str,
+                zarr_format=zarr_format,
+            )
+            assert a.dtype == c.dtype
+
+    @staticmethod
+    @pytest.mark.parametrize("dtype", zdtype_examples)
+    def test_dtype_roundtrip(
+        dtype: ZDType[Any, Any], store: Store, zarr_format: ZarrFormat
+    ) -> None:
+        """
+        Test that creating an array, then opening it, gets the same array.
+        """
+        a = zarr.create_array(store, shape=(5,), chunks=(5,), dtype=dtype, zarr_format=zarr_format)
+        b = zarr.open_array(store)
+        assert a.dtype == b.dtype
 
     @staticmethod
     @pytest.mark.parametrize("dtype", ["uint8", "float32", "str", "U3", "S4", "V1"])
@@ -1266,6 +1319,64 @@ async def test_name(store: Store, zarr_format: ZarrFormat, path: str | None) ->
                     store=store, path=parent_path, mode="r", zarr_format=zarr_format
                 )
 
+    @staticmethod
+    @pytest.mark.parametrize("endianness", get_args(Endianness))
+    def test_default_endianness(
+        store: Store, zarr_format: ZarrFormat, endianness: Endianness
+    ) -> None:
+        """
+        Test that that endianness is correctly set when creating an array when not specifying a serializer
+        """
+        dtype = Int16(endianness=endianness)
+        arr = zarr.create_array(store=store, shape=(1,), dtype=dtype, zarr_format=zarr_format)
+        assert endianness_from_numpy_str(arr[:].dtype.byteorder) == endianness
+        if zarr_format == 3:
+            assert isinstance(arr.metadata, ArrayV3Metadata)  # mypy
+            assert str(arr.metadata.codecs[0].endian.value) == endianness  # type: ignore[union-attr]
+
+    @staticmethod
+    @pytest.mark.parametrize("endianness", get_args(Endianness))
+    def test_explicit_endianness(store: Store, endianness: Endianness) -> None:
+        """
+        Test that that a mismatch between the bytescodec endianness and the dtype endianness is an error
+        """
+        if endianness == "little":
+            dtype = Int16(endianness="big")
+        else:
+            dtype = Int16(endianness="little")
+
+        serializer = BytesCodec(endian=endianness)
+
+        msg = (
+            f"The endianness of the requested serializer ({serializer}) does not match the endianness of the dtype ({dtype.endianness}). "
+            "The endianness of the serializer and the dtype must match."
+        )
+
+        with pytest.raises(ValueError, match=re.escape(msg)):
+            _ = zarr.create_array(
+                store=store,
+                shape=(1,),
+                dtype=dtype,
+                zarr_format=3,
+                serializer=serializer,
+            )
+
+        # additional check for the case where the serializer has endian=None
+        none_serializer = dataclasses.replace(serializer, endian=None)
+        msg = (
+            f"The endianness of the requested serializer ({none_serializer}) does not match the endianness of the dtype ({dtype.endianness}). "
+            "The endianness of the serializer and the dtype must match."
+        )
+
+        with pytest.raises(ValueError, match=re.escape(msg)):
+            _ = zarr.create_array(
+                store=store,
+                shape=(1,),
+                dtype=dtype,
+                zarr_format=3,
+                serializer=none_serializer,
+            )
+
 
 async def test_scalar_array() -> None:
     arr = zarr.array(1.5)
@@ -1384,61 +1495,3 @@ async def test_sharding_coordinate_selection() -> None:
     )
     arr[:] = np.arange(2 * 3 * 4).reshape((2, 3, 4))
     assert (arr[1, [0, 1]] == np.array([[12, 13, 14, 15], [16, 17, 18, 19]])).all()
-
-
-@pytest.mark.parametrize("store", ["memory"], indirect=True)
-@pytest.mark.parametrize("endianness", get_args(Endianness))
-def test_default_endianness(store: Store, zarr_format: ZarrFormat, endianness: Endianness) -> None:
-    """
-    Test that that endianness is correctly set when creating an array when not specifying a serializer
-    """
-    dtype = Int16(endianness=endianness)
-    arr = zarr.create_array(store=store, shape=(1,), dtype=dtype, zarr_format=zarr_format)
-    assert endianness_from_numpy_str(arr[:].dtype.byteorder) == endianness
-    if zarr_format == 3:
-        assert isinstance(arr.metadata, ArrayV3Metadata)  # mypy
-        assert str(arr.metadata.codecs[0].endian.value) == endianness  # type: ignore[union-attr]
-
-
-@pytest.mark.parametrize("store", ["memory"], indirect=True)
-@pytest.mark.parametrize("endianness", get_args(Endianness))
-def test_explicit_endianness(store: Store, endianness: Endianness) -> None:
-    """
-    Test that that a mismatch between the bytescodec endianness and the dtype endianness is an error
-    """
-    if endianness == "little":
-        dtype = Int16(endianness="big")
-    else:
-        dtype = Int16(endianness="little")
-
-    serializer = BytesCodec(endian=endianness)
-
-    msg = (
-        f"The endianness of the requested serializer ({serializer}) does not match the endianness of the dtype ({dtype.endianness}). "
-        "The endianness of the serializer and the dtype must match."
-    )
-
-    with pytest.raises(ValueError, match=re.escape(msg)):
-        _ = zarr.create_array(
-            store=store,
-            shape=(1,),
-            dtype=dtype,
-            zarr_format=3,
-            serializer=serializer,
-        )
-
-    # additional check for the case where the serializer has endian=None
-    none_serializer = dataclasses.replace(serializer, endian=None)
-    msg = (
-        f"The endianness of the requested serializer ({none_serializer}) does not match the endianness of the dtype ({dtype.endianness}). "
-        "The endianness of the serializer and the dtype must match."
-    )
-
-    with pytest.raises(ValueError, match=re.escape(msg)):
-        _ = zarr.create_array(
-            store=store,
-            shape=(1,),
-            dtype=dtype,
-            zarr_format=3,
-            serializer=none_serializer,
-        )

From aa322715adcba81f90da998ff7f56ba9c379b654 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Tue, 25 Mar 2025 10:11:54 +0100
Subject: [PATCH 054/130] route object dtype arrays to vlen string dtype when
 numpy > 2

---
 src/zarr/core/dtype/__init__.py | 12 +++++-------
 src/zarr/core/dtype/_numpy.py   |  2 +-
 2 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/src/zarr/core/dtype/__init__.py b/src/zarr/core/dtype/__init__.py
index f9b1364011..5483b21998 100644
--- a/src/zarr/core/dtype/__init__.py
+++ b/src/zarr/core/dtype/__init__.py
@@ -90,7 +90,10 @@ def get_data_type_from_native_dtype(dtype: npt.DTypeLike) -> ZDType[_BaseDType,
     """
     data_type_registry.lazy_load()
     if not isinstance(dtype, np.dtype):
-        if dtype in (str, "str"):
+        # TODO: This check has a lot of assumptions in it! Chiefly, we assume that the
+        # numpy object dtype contains variable length strings, which is not in general true
+        # When / if zarr python supports ragged arrays, for example, this check will fail!
+        if dtype in (str, "str", "|T16", "O", "|O", np.dtypes.ObjectDType()):
             if _NUMPY_SUPPORTS_VLEN_STRING:
                 na_dtype = np.dtype("T")
             else:
@@ -99,12 +102,7 @@ def get_data_type_from_native_dtype(dtype: npt.DTypeLike) -> ZDType[_BaseDType,
             # this is a valid _VoidDTypeLike check
             na_dtype = np.dtype([tuple(d) for d in dtype])
         else:
-            if dtype == "|T16":
-                # `|T16` is the numpy dtype str form for variable length strings. unfortunately
-                # numpy cannot create these directly from np.dtype("|T16")
-                na_dtype = np.dtypes.StringDType()
-            else:
-                na_dtype = np.dtype(dtype)
+            na_dtype = np.dtype(dtype)
     else:
         na_dtype = dtype
     return data_type_registry.match_dtype(na_dtype)
diff --git a/src/zarr/core/dtype/_numpy.py b/src/zarr/core/dtype/_numpy.py
index cab849cf74..7c803ce1f0 100644
--- a/src/zarr/core/dtype/_numpy.py
+++ b/src/zarr/core/dtype/_numpy.py
@@ -1051,7 +1051,7 @@ def _cast_value_unsafe(self, value: object) -> str:
             return str(value)
 
 else:
-
+    # Numpy pre-2 does not have a variable length string dtype, so we use the Object dtype instead.
     @dataclass(frozen=True, kw_only=True)
     class VariableLengthString(ZDType[np.dtypes.ObjectDType, str]):  # type: ignore[no-redef]
         dtype_cls = np.dtypes.ObjectDType

From 617d3f05dabeba2b8e5b654406a581965825e2b7 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Tue, 25 Mar 2025 10:57:22 +0100
Subject: [PATCH 055/130] relax endianness mismatch to a warning instead of an
 error

---
 src/zarr/core/array.py           |  4 ++--
 tests/test_array.py              | 21 +++------------------
 tests/test_codecs/test_endian.py |  2 ++
 3 files changed, 7 insertions(+), 20 deletions(-)

diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py
index cba4a49410..9a0d0fa83d 100644
--- a/src/zarr/core/array.py
+++ b/src/zarr/core/array.py
@@ -4382,9 +4382,9 @@ def _parse_chunk_encoding_v3(
         ):
             msg = (
                 f"The endianness of the requested serializer ({out_array_bytes}) does not match the endianness of the dtype ({dtype.endianness}). "
-                "The endianness of the serializer and the dtype must match."
+                "In this situation the serializer's endianness takes priority. To avoid this warning, ensure the endianness of the serializer matches the endianness of the dtype."
             )
-            raise ValueError(msg)
+            warnings.warn(msg, UserWarning, stacklevel=2)
 
     if compressors is None:
         out_bytes_bytes: tuple[BytesBytesCodec, ...] = ()
diff --git a/tests/test_array.py b/tests/test_array.py
index 7b8b72f119..2da7f0fa72 100644
--- a/tests/test_array.py
+++ b/tests/test_array.py
@@ -1349,10 +1349,11 @@ def test_explicit_endianness(store: Store, endianness: Endianness) -> None:
 
         msg = (
             f"The endianness of the requested serializer ({serializer}) does not match the endianness of the dtype ({dtype.endianness}). "
-            "The endianness of the serializer and the dtype must match."
+            "In this situation the serializer's endianness takes priority. "
+            "To avoid this warning, ensure the endianness of the serializer matches the endianness of the dtype."
         )
 
-        with pytest.raises(ValueError, match=re.escape(msg)):
+        with pytest.warns(UserWarning, match=re.escape(msg)):
             _ = zarr.create_array(
                 store=store,
                 shape=(1,),
@@ -1361,22 +1362,6 @@ def test_explicit_endianness(store: Store, endianness: Endianness) -> None:
                 serializer=serializer,
             )
 
-        # additional check for the case where the serializer has endian=None
-        none_serializer = dataclasses.replace(serializer, endian=None)
-        msg = (
-            f"The endianness of the requested serializer ({none_serializer}) does not match the endianness of the dtype ({dtype.endianness}). "
-            "The endianness of the serializer and the dtype must match."
-        )
-
-        with pytest.raises(ValueError, match=re.escape(msg)):
-            _ = zarr.create_array(
-                store=store,
-                shape=(1,),
-                dtype=dtype,
-                zarr_format=3,
-                serializer=none_serializer,
-            )
-
 
 async def test_scalar_array() -> None:
     arr = zarr.array(1.5)
diff --git a/tests/test_codecs/test_endian.py b/tests/test_codecs/test_endian.py
index c0c4dd4e75..ab64afb1b8 100644
--- a/tests/test_codecs/test_endian.py
+++ b/tests/test_codecs/test_endian.py
@@ -11,6 +11,7 @@
 from .test_codecs import _AsyncArrayProxy
 
 
+@pytest.mark.filterwarnings("ignore:The endianness of the requested serializer")
 @pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"])
 @pytest.mark.parametrize("endian", ["big", "little"])
 async def test_endian(store: Store, endian: Literal["big", "little"]) -> None:
@@ -32,6 +33,7 @@ async def test_endian(store: Store, endian: Literal["big", "little"]) -> None:
     assert np.array_equal(data, readback_data)
 
 
+@pytest.mark.filterwarnings("ignore:The endianness of the requested serializer")
 @pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"])
 @pytest.mark.parametrize("dtype_input_endian", [">u2", "<u2"])
 @pytest.mark.parametrize("dtype_store_endian", ["big", "little"])

From 2b5fd8f0577e9828d0a0ca1e806d4736d98c7faf Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Tue, 25 Mar 2025 12:14:56 +0100
Subject: [PATCH 056/130] use public dtype module for docs instead of
 special-casing the core dype module

---
 docs/conf.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/docs/conf.py b/docs/conf.py
index d29088d070..08f8318fd7 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -51,7 +51,7 @@
 issues_github_path = "zarr-developers/zarr-python"
 
 autoapi_dirs = ['../src/zarr']
-autoapi_add_toctree_entry = False
+autoapi_add_toctree_entry = True
 autoapi_generate_api_docs = True
 autoapi_member_order = "groupwise"
 autoapi_root = "api"
@@ -68,10 +68,7 @@ def skip_submodules(
       ) -> bool:
     # Skip documenting zarr.codecs submodules
     # codecs are documented in the main zarr.codecs namespace
-    # TODO: just document everything instead using this weak case-by-case logic
-    if what == "module" and name.startswith("zarr.core.dtype."):
-        skip = False
-    elif what == "module" and name.startswith("zarr.codecs.") or name.startswith("zarr.core"):
+    if what == "module" and name.startswith("zarr.codecs.") or name.startswith("zarr.core"):
         skip = True
     return skip
 

From 1831f206c09ee9b7ca563a42ebdc4bb89772220a Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Tue, 25 Mar 2025 12:15:08 +0100
Subject: [PATCH 057/130] use public dtype module for docs instead of
 special-casing the core dype module

---
 docs/conf.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/conf.py b/docs/conf.py
index 08f8318fd7..9bb1c48901 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -51,7 +51,7 @@
 issues_github_path = "zarr-developers/zarr-python"
 
 autoapi_dirs = ['../src/zarr']
-autoapi_add_toctree_entry = True
+autoapi_add_toctree_entry = False
 autoapi_generate_api_docs = True
 autoapi_member_order = "groupwise"
 autoapi_root = "api"

From a427a16192b0fd39bcaa7a16e503786775b2c3ce Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Tue, 25 Mar 2025 12:35:17 +0100
Subject: [PATCH 058/130] silence mypy error about array indexing

---
 tests/test_array.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_array.py b/tests/test_array.py
index 2da7f0fa72..4579c8bd58 100644
--- a/tests/test_array.py
+++ b/tests/test_array.py
@@ -1479,4 +1479,4 @@ async def test_sharding_coordinate_selection() -> None:
         shards=(2, 4, 4),
     )
     arr[:] = np.arange(2 * 3 * 4).reshape((2, 3, 4))
-    assert (arr[1, [0, 1]] == np.array([[12, 13, 14, 15], [16, 17, 18, 19]])).all()
+    assert (arr[1, [0, 1]] == np.array([[12, 13, 14, 15], [16, 17, 18, 19]])).all()  # type: ignore[index]

From 41d7e585eed8875aad5366f269881c55da9c08d0 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Tue, 25 Mar 2025 12:35:38 +0100
Subject: [PATCH 059/130] add release note

---
 changes/2874.feature.rst | 2 ++
 1 file changed, 2 insertions(+)
 create mode 100644 changes/2874.feature.rst

diff --git a/changes/2874.feature.rst b/changes/2874.feature.rst
new file mode 100644
index 0000000000..26eda3a257
--- /dev/null
+++ b/changes/2874.feature.rst
@@ -0,0 +1,2 @@
+Adds zarr-specific data type classes. This replaces the direct use of numpy data types for zarr
+v2 and a fixed set of string enums for zarr v3. For more on this new feature, see the `documentation <https://zarr.readthedocs.io/en/stable/user-guide/data_types.html>`_
\ No newline at end of file

From c08ffd9970ab04c43f243902b9e5a5458c8d17ab Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Tue, 25 Mar 2025 12:50:26 +0100
Subject: [PATCH 060/130] fix doctests, excluding config tests

---
 docs/user-guide/groups.rst      | 4 ++--
 docs/user-guide/performance.rst | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/docs/user-guide/groups.rst b/docs/user-guide/groups.rst
index c2a955718b..50e8a68aad 100644
--- a/docs/user-guide/groups.rst
+++ b/docs/user-guide/groups.rst
@@ -128,7 +128,7 @@ property. E.g.::
    >>> bar.info_complete()
    Type               : Array
    Zarr format        : 3
-   Data type          : int64
+   Data type          : Int64(endianness='little')
    Shape              : (1000000,)
    Chunk shape        : (100000,)
    Order              : C
@@ -144,7 +144,7 @@ property. E.g.::
    >>> baz.info
    Type               : Array
    Zarr format        : 3
-   Data type          : float32
+   Data type          : Float32(endianness='little')
    Shape              : (1000, 1000)
    Chunk shape        : (100, 100)
    Order              : C
diff --git a/docs/user-guide/performance.rst b/docs/user-guide/performance.rst
index 5c7844f92c..40882fbf1f 100644
--- a/docs/user-guide/performance.rst
+++ b/docs/user-guide/performance.rst
@@ -91,7 +91,7 @@ To use sharding, you need to specify the ``shards`` parameter when creating the
    >>> z6.info
    Type               : Array
    Zarr format        : 3
-   Data type          : uint8
+   Data type          : UInt8()
    Shape              : (10000, 10000, 1000)
    Shard shape        : (1000, 1000, 1000)
    Chunk shape        : (100, 100, 100)
@@ -121,7 +121,7 @@ ratios, depending on the correlation structure within the data. E.g.::
    >>> c.info_complete()
    Type               : Array
    Zarr format        : 3
-   Data type          : int32
+   Data type          : Int32(endianness='little')
    Shape              : (10000, 10000)
    Chunk shape        : (1000, 1000)
    Order              : C
@@ -140,7 +140,7 @@ ratios, depending on the correlation structure within the data. E.g.::
    >>> f.info_complete()
    Type               : Array
    Zarr format        : 3
-   Data type          : int32
+   Data type          : Int32(endianness='little')
    Shape              : (10000, 10000)
    Chunk shape        : (1000, 1000)
    Order              : F

From 778d740c3b47665c6197eaa2e6ffe1c5557f77d5 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Wed, 26 Mar 2025 10:17:28 +0100
Subject: [PATCH 061/130] revert addition of linkage between dtype endianness
 and bytes codec endianness

---
 src/zarr/core/array.py | 39 ++++++---------------------------------
 tests/test_array.py    | 33 +--------------------------------
 2 files changed, 7 insertions(+), 65 deletions(-)

diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py
index 9a0d0fa83d..2a63e07f27 100644
--- a/src/zarr/core/array.py
+++ b/src/zarr/core/array.py
@@ -69,7 +69,6 @@
     ZDTypeLike,
     parse_data_type,
 )
-from zarr.core.dtype._numpy import HasEndianness
 from zarr.core.indexing import (
     BasicIndexer,
     BasicSelection,
@@ -4247,24 +4246,6 @@ def _get_default_chunk_encoding_v3(
     else:
         serializer = zarr_config.get("array.v3_default_serializer.default")
 
-    # Modify the default serializer so that it matches the endianness of the dtype, otherwise unset the
-    # endian key
-
-    # This is effective problematic for many reasons:
-    # - we are assuming that endianness is set by the serializer, when it could also be changed
-    # by any one of the filters.
-    # - we are assuming that the serializer has a specific configuration. A different serializer that
-    # alters endianness might not use the same configuration structure.
-    # - we are mutating a configuration dictionary. It would be much better to work with the codec
-    # api for this.
-    # All of these things are acceptable right now because there is only 1 serializer that affects
-    # endianness, but this design will not last if this situation changes.
-    if serializer.get("configuration") is not None:
-        if isinstance(dtype, HasEndianness):
-            serializer["configuration"]["endian"] = dtype.endianness
-        else:
-            serializer["configuration"].pop("endian", None)
-
     return (
         tuple(_parse_array_array_codec(f) for f in filters),
         _parse_array_bytes_codec(serializer),
@@ -4370,21 +4351,10 @@ def _parse_chunk_encoding_v3(
     if serializer == "auto":
         out_array_bytes = default_array_bytes
     else:
+        # TODO: ensure that the serializer is compatible with the ndarray produced by the
+        # array-array codecs. For example, if a sequence of array-array codecs produces an
+        # array with a single-byte data type, then the serializer should not specify endiannesss.
         out_array_bytes = _parse_array_bytes_codec(serializer)
-        # check that the endianness of the requested serializer matches the dtype of the data, if applicable
-        if (
-            isinstance(out_array_bytes, BytesCodec)
-            and isinstance(dtype, HasEndianness)
-            and (
-                out_array_bytes.endian is None
-                or str(out_array_bytes.endian.value) != dtype.endianness
-            )
-        ):
-            msg = (
-                f"The endianness of the requested serializer ({out_array_bytes}) does not match the endianness of the dtype ({dtype.endianness}). "
-                "In this situation the serializer's endianness takes priority. To avoid this warning, ensure the endianness of the serializer matches the endianness of the dtype."
-            )
-            warnings.warn(msg, UserWarning, stacklevel=2)
 
     if compressors is None:
         out_bytes_bytes: tuple[BytesBytesCodec, ...] = ()
@@ -4404,6 +4374,9 @@ def _parse_chunk_encoding_v3(
     # TODO: refactor so that the config only contains the name of the codec, and we use the dtype
     # to create the codec instance, instead of storing a dict representation of a full codec.
 
+    # TODO: ensure that the serializer is compatible with the ndarray produced by the
+    # array-array codecs. For example, if a sequence of array-array codecs produces an
+    # array with a single-byte data type, then the serializer should not specify endiannesss.
     if isinstance(out_array_bytes, BytesCodec) and dtype.to_dtype().itemsize == 1:
         # The default endianness in the bytescodec might not be None, so we need to replace it
         out_array_bytes = replace(out_array_bytes, endian=None)
diff --git a/tests/test_array.py b/tests/test_array.py
index 4579c8bd58..ae53a3d3d6 100644
--- a/tests/test_array.py
+++ b/tests/test_array.py
@@ -50,7 +50,6 @@
 from zarr.core.dtype.wrapper import ZDType
 from zarr.core.group import AsyncGroup
 from zarr.core.indexing import BasicIndexer, ceildiv
-from zarr.core.metadata.v3 import ArrayV3Metadata
 from zarr.core.sync import sync
 from zarr.errors import ContainsArrayError, ContainsGroupError
 from zarr.storage import LocalStore, MemoryStore, StorePath
@@ -60,6 +59,7 @@
 if TYPE_CHECKING:
     from zarr.core.array_spec import ArrayConfigLike
     from zarr.core.metadata.v2 import ArrayV2Metadata
+    from zarr.core.metadata.v3 import ArrayV3Metadata
 
 
 @pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"])
@@ -1330,37 +1330,6 @@ def test_default_endianness(
         dtype = Int16(endianness=endianness)
         arr = zarr.create_array(store=store, shape=(1,), dtype=dtype, zarr_format=zarr_format)
         assert endianness_from_numpy_str(arr[:].dtype.byteorder) == endianness
-        if zarr_format == 3:
-            assert isinstance(arr.metadata, ArrayV3Metadata)  # mypy
-            assert str(arr.metadata.codecs[0].endian.value) == endianness  # type: ignore[union-attr]
-
-    @staticmethod
-    @pytest.mark.parametrize("endianness", get_args(Endianness))
-    def test_explicit_endianness(store: Store, endianness: Endianness) -> None:
-        """
-        Test that that a mismatch between the bytescodec endianness and the dtype endianness is an error
-        """
-        if endianness == "little":
-            dtype = Int16(endianness="big")
-        else:
-            dtype = Int16(endianness="little")
-
-        serializer = BytesCodec(endian=endianness)
-
-        msg = (
-            f"The endianness of the requested serializer ({serializer}) does not match the endianness of the dtype ({dtype.endianness}). "
-            "In this situation the serializer's endianness takes priority. "
-            "To avoid this warning, ensure the endianness of the serializer matches the endianness of the dtype."
-        )
-
-        with pytest.warns(UserWarning, match=re.escape(msg)):
-            _ = zarr.create_array(
-                store=store,
-                shape=(1,),
-                dtype=dtype,
-                zarr_format=3,
-                serializer=serializer,
-            )
 
 
 async def test_scalar_array() -> None:

From 269215eb3005fd653a173c96aa508d9a484df2fb Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Wed, 26 Mar 2025 10:45:53 +0100
Subject: [PATCH 062/130] remove Any types

---
 src/zarr/core/_info.py          | 6 +++---
 src/zarr/core/array_spec.py     | 6 +++---
 src/zarr/core/dtype/__init__.py | 6 +++---
 src/zarr/core/dtype/_numpy.py   | 2 +-
 4 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/zarr/core/_info.py b/src/zarr/core/_info.py
index 3e605773bb..525b80c65f 100644
--- a/src/zarr/core/_info.py
+++ b/src/zarr/core/_info.py
@@ -2,14 +2,14 @@
 
 import dataclasses
 import textwrap
-from typing import TYPE_CHECKING, Any, Literal
+from typing import TYPE_CHECKING, Literal
 
 if TYPE_CHECKING:
     import numcodecs.abc
 
     from zarr.abc.codec import ArrayArrayCodec, ArrayBytesCodec, BytesBytesCodec
     from zarr.core.common import ZarrFormat
-    from zarr.core.dtype.wrapper import ZDType
+    from zarr.core.dtype.wrapper import ZDType, _BaseDType, _BaseScalar
 
 
 @dataclasses.dataclass(kw_only=True)
@@ -80,7 +80,7 @@ class ArrayInfo:
 
     _type: Literal["Array"] = "Array"
     _zarr_format: ZarrFormat
-    _data_type: ZDType[Any, Any]
+    _data_type: ZDType[_BaseDType, _BaseScalar]
     _shape: tuple[int, ...]
     _shard_shape: tuple[int, ...] | None = None
     _chunk_shape: tuple[int, ...] | None = None
diff --git a/src/zarr/core/array_spec.py b/src/zarr/core/array_spec.py
index f1eac930c4..e8e451944f 100644
--- a/src/zarr/core/array_spec.py
+++ b/src/zarr/core/array_spec.py
@@ -17,7 +17,7 @@
 
     from zarr.core.buffer import BufferPrototype
     from zarr.core.common import ChunkCoords
-    from zarr.core.dtype.wrapper import ZDType
+    from zarr.core.dtype.wrapper import ZDType, _BaseDType, _BaseScalar
 
 
 class ArrayConfigParams(TypedDict):
@@ -89,7 +89,7 @@ def parse_array_config(data: ArrayConfigLike | None) -> ArrayConfig:
 @dataclass(frozen=True)
 class ArraySpec:
     shape: ChunkCoords
-    dtype: ZDType[Any, Any]
+    dtype: ZDType[_BaseDType, _BaseScalar]
     fill_value: Any
     config: ArrayConfig
     prototype: BufferPrototype
@@ -97,7 +97,7 @@ class ArraySpec:
     def __init__(
         self,
         shape: ChunkCoords,
-        dtype: ZDType[Any, Any],
+        dtype: ZDType[_BaseDType, _BaseScalar],
         fill_value: Any,
         config: ArrayConfig,
         prototype: BufferPrototype,
diff --git a/src/zarr/core/dtype/__init__.py b/src/zarr/core/dtype/__init__.py
index 5483b21998..0aaf9ccf06 100644
--- a/src/zarr/core/dtype/__init__.py
+++ b/src/zarr/core/dtype/__init__.py
@@ -1,6 +1,6 @@
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, Any, TypeAlias, get_args
+from typing import TYPE_CHECKING, TypeAlias, get_args
 
 if TYPE_CHECKING:
     from zarr.core.common import ZarrFormat
@@ -77,7 +77,7 @@
     | DateTime64
 )
 
-ZDTypeLike: TypeAlias = npt.DTypeLike | ZDType[Any, Any] | dict[str, JSON]
+ZDTypeLike: TypeAlias = npt.DTypeLike | ZDType[_BaseDType, _BaseScalar] | dict[str, JSON]
 
 for dtype in get_args(DTYPE):
     data_type_registry.register(dtype._zarr_v3_name, dtype)
@@ -114,7 +114,7 @@ def get_data_type_from_json(
     return data_type_registry.match_json(dtype, zarr_format=zarr_format)
 
 
-def parse_data_type(dtype: ZDTypeLike, zarr_format: ZarrFormat) -> ZDType[Any, Any]:
+def parse_data_type(dtype: ZDTypeLike, zarr_format: ZarrFormat) -> ZDType[_BaseDType, _BaseScalar]:
     """
     Interpret the input as a ZDType instance.
     """
diff --git a/src/zarr/core/dtype/_numpy.py b/src/zarr/core/dtype/_numpy.py
index 7c803ce1f0..51be83b173 100644
--- a/src/zarr/core/dtype/_numpy.py
+++ b/src/zarr/core/dtype/_numpy.py
@@ -1232,7 +1232,7 @@ def check_dtype(cls, dtype: _BaseDType) -> TypeGuard[np.dtypes.VoidDType[int]]:
     def _from_dtype_unsafe(cls, dtype: _BaseDType) -> Self:
         from zarr.core.dtype import get_data_type_from_native_dtype
 
-        fields: list[tuple[str, ZDType[Any, Any]]] = []
+        fields: list[tuple[str, ZDType[_BaseDType, _BaseScalar]]] = []
 
         if dtype.fields is None:
             raise ValueError("numpy dtype has no fields")

From 8af0ce420c4622fc76d2c0ab2a243994ff493dcb Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Wed, 26 Mar 2025 11:27:54 +0100
Subject: [PATCH 063/130] add docstring for wrapper module

---
 src/zarr/core/dtype/wrapper.py | 27 +++++++++++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)

diff --git a/src/zarr/core/dtype/wrapper.py b/src/zarr/core/dtype/wrapper.py
index 74e7bf79e1..ba1b78f096 100644
--- a/src/zarr/core/dtype/wrapper.py
+++ b/src/zarr/core/dtype/wrapper.py
@@ -1,3 +1,25 @@
+"""
+Wrapper for native array data types.
+
+The `ZDType` class is an abstract base class for wrapping native array data types, e.g. numpy dtypes.
+It provides a common interface for working with data types in a way that is independent of the
+underlying data type system.
+
+The wrapper class encapsulates a native data type. Instances of the class can be created from a
+native data type instance, and a native data type instance can be created from an instance of the
+wrapper class.
+
+The wrapper class is responsible for:
+- Reversibly serializing a native data type to Zarr V2 or Zarr V3 metadata.
+  This ensures that the data type can be properly stored and retrieved from array metadata.
+- Reversibly serializing scalar values to Zarr V2 or Zarr V3 metadata. This is important for
+  storing a fill value for an array in a manner that is valid for the data type.
+
+To add support for a new data type in Zarr, you should subclass the wrapper class and adapt its methods
+to support your native data type. The wrapper class must be added to a data type registry
+(defined elsewhere) before ``create_array`` can properly handle the new data type.
+"""
+
 from __future__ import annotations
 
 from abc import ABC, abstractmethod
@@ -17,9 +39,10 @@
 # This is the bound for the dtypes that we support. If we support non-numpy dtypes,
 # then this bound will need to be widened.
 _BaseDType = np.dtype[np.generic]
+# These two type parameters are covariant because we want
+# x : ZDType[BaseDType, BaseScalar] = ZDType[SubDType, SubScalar]
+# to type check
 TScalar_co = TypeVar("TScalar_co", bound=_BaseScalar, covariant=True)
-# TODO: figure out an interface or protocol that non-numpy dtypes can use
-# These two type parameters are covariant because we want isinstance(ZDType[Subclass](), ZDType[BaseDType]) to be True
 TDType_co = TypeVar("TDType_co", bound=_BaseDType, covariant=True)
 
 

From df60d057f3ebdadac7ce29457a0eabbebc15d2c2 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Wed, 26 Mar 2025 15:23:08 +0100
Subject: [PATCH 064/130] simplify config and docs

---
 docs/user-guide/arrays.rst | 10 +++----
 docs/user-guide/config.rst | 59 ++++++++++++++++----------------------
 src/zarr/core/array.py     | 41 ++++++--------------------
 src/zarr/core/config.py    | 44 ++++++++++++++++++++++------
 4 files changed, 73 insertions(+), 81 deletions(-)

diff --git a/docs/user-guide/arrays.rst b/docs/user-guide/arrays.rst
index f55dd00c80..a354298a16 100644
--- a/docs/user-guide/arrays.rst
+++ b/docs/user-guide/arrays.rst
@@ -182,7 +182,7 @@ which can be used to print useful diagnostics, e.g.::
    >>> z.info
    Type               : Array
    Zarr format        : 3
-   Data type          : int32
+   Data type          : Int32(endianness='little')
    Shape              : (10000, 10000)
    Chunk shape        : (1000, 1000)
    Order              : C
@@ -199,7 +199,7 @@ prints additional diagnostics, e.g.::
    >>> z.info_complete()
    Type               : Array
    Zarr format        : 3
-   Data type          : int32
+   Data type          : Int32(endianness='little')
    Shape              : (10000, 10000)
    Chunk shape        : (1000, 1000)
    Order              : C
@@ -246,7 +246,7 @@ built-in delta filter::
 The default compressor can be changed by setting the value of the using Zarr's
 :ref:`user-guide-config`, e.g.::
 
-   >>> with zarr.config.set({'array.v2_default_compressor.numeric': {'id': 'blosc'}}):
+   >>> with zarr.config.set({'array.v2_default_compressor.default': {'id': 'blosc'}}):
    ...     z = zarr.create_array(store={}, shape=(100000000,), chunks=(1000000,), dtype='int32', zarr_format=2)
    >>> z.filters
    ()
@@ -286,7 +286,7 @@ Here is an example using a delta filter with the Blosc compressor::
    >>> z.info
    Type               : Array
    Zarr format        : 3
-   Data type          : int32
+   Data type          : Int32(endianness='little')
    Shape              : (10000, 10000)
    Chunk shape        : (1000, 1000)
    Order              : C
@@ -600,7 +600,7 @@ Sharded arrays can be created by providing the ``shards`` parameter to :func:`za
   >>> a.info_complete()
   Type               : Array
   Zarr format        : 3
-  Data type          : uint8
+  Data type          : UInt8()
   Shape              : (10000, 10000)
   Shard shape        : (1000, 1000)
   Chunk shape        : (100, 100)
diff --git a/docs/user-guide/config.rst b/docs/user-guide/config.rst
index 91ffe50b91..4479e30619 100644
--- a/docs/user-guide/config.rst
+++ b/docs/user-guide/config.rst
@@ -43,39 +43,30 @@ This is the current default configuration::
 
    >>> zarr.config.pprint()
    {'array': {'order': 'C',
-              'v2_default_compressor': {'bytes': {'checksum': False,
-                                                  'id': 'zstd',
-                                                  'level': 0},
-                                        'numeric': {'checksum': False,
-                                                    'id': 'zstd',
-                                                    'level': 0},
-                                        'string': {'checksum': False,
+            'v2_default_compressor': {'default': {'checksum': False,
                                                    'id': 'zstd',
-                                                   'level': 0}},
-              'v2_default_filters': {'bytes': [{'id': 'vlen-bytes'}],
-                                     'numeric': None,
-                                     'raw': None,
-                                     'string': [{'id': 'vlen-utf8'}]},
-              'v3_default_compressors': {'bytes': [{'configuration': {'checksum': False,
-                                                                      'level': 0},
-                                                    'name': 'zstd'}],
-                                         'numeric': [{'configuration': {'checksum': False,
+                                                   'level': 0},
+                                       'variable-length-string': {'checksum': False,
+                                                                  'id': 'zstd',
+                                                                  'level': 0}},
+            'v2_default_filters': {'default': None,
+                                    'variable-length-string': [{'id': 'vlen-utf8'}]},
+            'v3_default_compressors': {'default': [{'configuration': {'checksum': False,
                                                                         'level': 0},
                                                       'name': 'zstd'}],
-                                         'string': [{'configuration': {'checksum': False,
-                                                                       'level': 0},
-                                                     'name': 'zstd'}]},
-              'v3_default_filters': {'bytes': [], 'numeric': [], 'string': []},
-              'v3_default_serializer': {'bytes': {'name': 'vlen-bytes'},
-                                        'numeric': {'configuration': {'endian': 'little'},
-                                                    'name': 'bytes'},
-                                        'string': {'name': 'vlen-utf8'}},
-              'write_empty_chunks': False},
-    'async': {'concurrency': 10, 'timeout': None},
-    'buffer': 'zarr.core.buffer.cpu.Buffer',
-    'codec_pipeline': {'batch_size': 1,
-                       'path': 'zarr.core.codec_pipeline.BatchedCodecPipeline'},
-    'codecs': {'blosc': 'zarr.codecs.blosc.BloscCodec',
+                                       'variable-length-string': [{'configuration': {'checksum': False,
+                                                                                       'level': 0},
+                                                                     'name': 'zstd'}]},
+            'v3_default_filters': {'default': [], 'variable-length-string': []},
+            'v3_default_serializer': {'default': {'configuration': {'endian': 'little'},
+                                                   'name': 'bytes'},
+                                       'variable-length-string': {'name': 'vlen-utf8'}},
+            'write_empty_chunks': False},
+   'async': {'concurrency': 10, 'timeout': None},
+   'buffer': 'zarr.core.buffer.cpu.Buffer',
+   'codec_pipeline': {'batch_size': 1,
+                     'path': 'zarr.core.codec_pipeline.BatchedCodecPipeline'},
+   'codecs': {'blosc': 'zarr.codecs.blosc.BloscCodec',
                'bytes': 'zarr.codecs.bytes.BytesCodec',
                'crc32c': 'zarr.codecs.crc32c_.Crc32cCodec',
                'endian': 'zarr.codecs.bytes.BytesCodec',
@@ -85,7 +76,7 @@ This is the current default configuration::
                'vlen-bytes': 'zarr.codecs.vlen_utf8.VLenBytesCodec',
                'vlen-utf8': 'zarr.codecs.vlen_utf8.VLenUTF8Codec',
                'zstd': 'zarr.codecs.zstd.ZstdCodec'},
-    'default_zarr_format': 3,
-    'json_indent': 2,
-    'ndbuffer': 'zarr.core.buffer.cpu.NDBuffer',
-    'threading': {'max_workers': None}}
+   'default_zarr_format': 3,
+   'json_indent': 2,
+   'ndbuffer': 'zarr.core.buffer.cpu.NDBuffer',
+   'threading': {'max_workers': None}}
diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py
index 2a63e07f27..8b1fb2d236 100644
--- a/src/zarr/core/array.py
+++ b/src/zarr/core/array.py
@@ -63,6 +63,7 @@
     parse_shapelike,
     product,
 )
+from zarr.core.config import categorize_data_type
 from zarr.core.config import config as zarr_config
 from zarr.core.dtype import (
     ZDType,
@@ -4224,27 +4225,12 @@ def _get_default_chunk_encoding_v3(
     """
     Get the default ArrayArrayCodecs, ArrayBytesCodec, and BytesBytesCodec for a given dtype.
     """
-    # the config will not allow keys to have "."  characters in them
-    # so we will access the config by transforming "." to "__"
 
-    dtype_name_conf = dtype._zarr_v3_name.replace(".", "__")
+    dtype_category = categorize_data_type(dtype)
 
-    # TODO: find a registry-style solution for this that isn't bloated
-    # We need to associate specific dtypes with specific encoding schemes
-
-    if dtype_name_conf in zarr_config.get("array.v3_default_filters"):
-        filters = zarr_config.get(f"array.v3_default_filters.{dtype_name_conf}")
-    else:
-        filters = zarr_config.get("array.v3_default_filters.default")
-
-    if dtype_name_conf in zarr_config.get("array.v3_default_compressors"):
-        compressors = zarr_config.get(f"array.v3_default_compressors.{dtype_name_conf}")
-    else:
-        compressors = zarr_config.get("array.v3_default_compressors.default")
-    if dtype_name_conf in zarr_config.get("array.v3_default_serializer"):
-        serializer = zarr_config.get(f"array.v3_default_serializer.{dtype_name_conf}")
-    else:
-        serializer = zarr_config.get("array.v3_default_serializer.default")
+    filters = zarr_config.get("array.v3_default_filters").get(dtype_category)
+    compressors = zarr_config.get("array.v3_default_compressors").get(dtype_category)
+    serializer = zarr_config.get("array.v3_default_serializer").get(dtype_category)
 
     return (
         tuple(_parse_array_array_codec(f) for f in filters),
@@ -4259,20 +4245,9 @@ def _get_default_chunk_encoding_v2(
     """
     Get the default chunk encoding for Zarr format 2 arrays, given a dtype
     """
-    # the config will not allow keys to have "."  characters in them
-    # so we will access the config by transforming "." to "__"
-    dtype_name_conf = dtype._zarr_v3_name.replace(".", "__")
-
-    if dtype_name_conf in zarr_config.get("array.v2_default_filters"):
-        filters = zarr_config.get(f"array.v2_default_filters.{dtype_name_conf}")
-    else:
-        filters = zarr_config.get("array.v2_default_filters.default")
-
-    if dtype_name_conf in zarr_config.get("array.v2_default_compressor"):
-        compressor = zarr_config.get(f"array.v2_default_compressor.{dtype_name_conf}")
-    else:
-        compressor = zarr_config.get("array.v2_default_compressor.default")
-
+    dtype_category = categorize_data_type(dtype)
+    filters = zarr_config.get("array.v2_default_filters").get(dtype_category)
+    compressor = zarr_config.get("array.v2_default_compressor").get(dtype_category)
     if filters is not None:
         filters = tuple(numcodecs.get_codec(f) for f in filters)
 
diff --git a/src/zarr/core/config.py b/src/zarr/core/config.py
index 8f87910daa..7c61c2e6ac 100644
--- a/src/zarr/core/config.py
+++ b/src/zarr/core/config.py
@@ -36,11 +36,21 @@
 if TYPE_CHECKING:
     from donfig.config_obj import ConfigSet
 
+    from zarr.core.dtype.wrapper import ZDType
+
 
 class BadConfigError(ValueError):
     _msg = "bad Config: %r"
 
 
+# These values are used for rough categorization of data types
+# we use this for choosing a default encoding scheme based on the data type. Specifically,
+# these categories are keys in a configuration dictionary.
+# it is not a part of the ZDType class because these categories are more of an implementation detail
+# of our config system rather than a useful attribute of any particular data type.
+DTypeCategory = Literal["variable-length-string", "default"]
+
+
 class Config(DConfig):  # type: ignore[misc]
     """The Config will collect configuration from config files and environment variables
 
@@ -77,24 +87,26 @@ def enable_gpu(self) -> ConfigSet:
             "array": {
                 "order": "C",
                 "write_empty_chunks": False,
-                "v2_default_compressor": {"default": {"id": "zstd", "level": 0, "checksum": False}},
+                "v2_default_compressor": {
+                    "default": {"id": "zstd", "level": 0, "checksum": False},
+                    "variable-length-string": {"id": "zstd", "level": 0, "checksum": False},
+                },
                 "v2_default_filters": {
                     "default": None,
-                    "numpy__variable_length_utf8": [{"id": "vlen-utf8"}],
-                    "numpy__fixed_length_ucs4": [{"id": "vlen-utf8"}],
-                    "numpy__fixed_length_ascii": [{"id": "vlen-bytes"}],
+                    "variable-length-string": [{"id": "vlen-utf8"}],
                 },
-                "v3_default_filters": {"default": []},
+                "v3_default_filters": {"default": [], "variable-length-string": []},
                 "v3_default_serializer": {
                     "default": {"name": "bytes", "configuration": {"endian": "little"}},
-                    "numpy__variable_length_utf8": {"name": "vlen-utf8"},
-                    "numpy__fixed_length_ucs4": {"name": "vlen-utf8"},
-                    "r*": {"name": "vlen-bytes"},
+                    "variable-length-string": {"name": "vlen-utf8"},
                 },
                 "v3_default_compressors": {
                     "default": [
                         {"name": "zstd", "configuration": {"level": 0, "checksum": False}},
-                    ]
+                    ],
+                    "variable-length-string": [
+                        {"name": "zstd", "configuration": {"level": 0, "checksum": False}}
+                    ],
                 },
             },
             "async": {"concurrency": 10, "timeout": None},
@@ -128,3 +140,17 @@ def parse_indexing_order(data: Any) -> Literal["C", "F"]:
         return cast(Literal["C", "F"], data)
     msg = f"Expected one of ('C', 'F'), got {data} instead."
     raise ValueError(msg)
+
+
+def categorize_data_type(dtype: ZDType[Any, Any]) -> DTypeCategory:
+    """
+    Classify a ZDType. The return value is a string which belongs to the type ``DTypeKind``.
+
+    This is used by the config system to determine how to encode arrays with the associated data type
+    when the user has not specified a particular serialization scheme.
+    """
+    from zarr.core.dtype._numpy import VariableLengthString
+
+    if isinstance(dtype, VariableLengthString):
+        return "variable-length-string"
+    return "default"

From 7f54bbfe2308f8910acae88b8affeb1f0bf74557 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Wed, 26 Mar 2025 17:32:03 +0100
Subject: [PATCH 065/130] update config test

---
 tests/test_config.py | 112 +++++++++++++++++++++++--------------------
 1 file changed, 60 insertions(+), 52 deletions(-)

diff --git a/tests/test_config.py b/tests/test_config.py
index 8d6e0a53ed..a2a84e7e7e 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -24,7 +24,7 @@
 from zarr.core.buffer import NDBuffer
 from zarr.core.codec_pipeline import BatchedCodecPipeline
 from zarr.core.config import BadConfigError, config
-from zarr.core.dtype import get_data_type_from_native_dtype
+from zarr.core.dtype._numpy import Int8, VariableLengthString
 from zarr.core.indexing import SelectorTuple
 from zarr.registry import (
     fully_qualified_name,
@@ -48,55 +48,60 @@
 
 def test_config_defaults_set() -> None:
     # regression test for available defaults
-    assert config.defaults == [
-        {
-            "default_zarr_format": 3,
-            "array": {
-                "order": "C",
-                "write_empty_chunks": False,
-                "v2_default_compressor": {"default": {"id": "zstd", "level": 0, "checksum": False}},
-                "v2_default_filters": {
-                    "default": None,
-                    "numpy__variable_length_utf8": [{"id": "vlen-utf8"}],
-                    "numpy__fixed_length_ucs4": [{"id": "vlen-utf8"}],
-                    "numpy__fixed_length_ascii": [{"id": "vlen-bytes"}],
+    assert (
+        config.defaults
+        == [
+            {
+                "default_zarr_format": 3,
+                "array": {
+                    "order": "C",
+                    "write_empty_chunks": False,
+                    "v2_default_compressor": {
+                        "default": {"id": "zstd", "level": 0, "checksum": False},
+                        "variable-length-string": {"id": "zstd", "level": 0, "checksum": False},
+                    },
+                    "v2_default_filters": {
+                        "default": None,
+                        "variable-length-string": [{"id": "vlen-utf8"}],
+                    },
+                    "v3_default_filters": {"default": [], "variable-length-string": []},
+                    "v3_default_serializer": {
+                        "default": {"name": "bytes", "configuration": {"endian": "little"}},
+                        "variable-length-string": {"name": "vlen-utf8"},
+                    },
+                    "v3_default_compressors": {
+                        "default": [
+                            {"name": "zstd", "configuration": {"level": 0, "checksum": False}},
+                        ],
+                        "variable-length-string": [
+                            {"name": "zstd", "configuration": {"level": 0, "checksum": False}}
+                        ],
+                    },
                 },
-                "v3_default_filters": {"default": []},
-                "v3_default_serializer": {
-                    "default": {"name": "bytes", "configuration": {"endian": "little"}},
-                    "numpy__variable_length_utf8": {"name": "vlen-utf8"},
-                    "numpy__fixed_length_ucs4": {"name": "vlen-utf8"},
-                    "r*": {"name": "vlen-bytes"},
+                "async": {"concurrency": 10, "timeout": None},
+                "threading": {"max_workers": None},
+                "json_indent": 2,
+                "codec_pipeline": {
+                    "path": "zarr.core.codec_pipeline.BatchedCodecPipeline",
+                    "batch_size": 1,
                 },
-                "v3_default_compressors": {
-                    "default": [
-                        {"name": "zstd", "configuration": {"level": 0, "checksum": False}},
-                    ]
+                "codecs": {
+                    "blosc": "zarr.codecs.blosc.BloscCodec",
+                    "gzip": "zarr.codecs.gzip.GzipCodec",
+                    "zstd": "zarr.codecs.zstd.ZstdCodec",
+                    "bytes": "zarr.codecs.bytes.BytesCodec",
+                    "endian": "zarr.codecs.bytes.BytesCodec",  # compatibility with earlier versions of ZEP1
+                    "crc32c": "zarr.codecs.crc32c_.Crc32cCodec",
+                    "sharding_indexed": "zarr.codecs.sharding.ShardingCodec",
+                    "transpose": "zarr.codecs.transpose.TransposeCodec",
+                    "vlen-utf8": "zarr.codecs.vlen_utf8.VLenUTF8Codec",
+                    "vlen-bytes": "zarr.codecs.vlen_utf8.VLenBytesCodec",
                 },
-            },
-            "async": {"concurrency": 10, "timeout": None},
-            "threading": {"max_workers": None},
-            "json_indent": 2,
-            "codec_pipeline": {
-                "path": "zarr.core.codec_pipeline.BatchedCodecPipeline",
-                "batch_size": 1,
-            },
-            "buffer": "zarr.core.buffer.cpu.Buffer",
-            "ndbuffer": "zarr.core.buffer.cpu.NDBuffer",
-            "codecs": {
-                "blosc": "zarr.codecs.blosc.BloscCodec",
-                "gzip": "zarr.codecs.gzip.GzipCodec",
-                "zstd": "zarr.codecs.zstd.ZstdCodec",
-                "bytes": "zarr.codecs.bytes.BytesCodec",
-                "endian": "zarr.codecs.bytes.BytesCodec",
-                "crc32c": "zarr.codecs.crc32c_.Crc32cCodec",
-                "sharding_indexed": "zarr.codecs.sharding.ShardingCodec",
-                "transpose": "zarr.codecs.transpose.TransposeCodec",
-                "vlen-utf8": "zarr.codecs.vlen_utf8.VLenUTF8Codec",
-                "vlen-bytes": "zarr.codecs.vlen_utf8.VLenBytesCodec",
-            },
-        }
-    ]
+                "buffer": "zarr.core.buffer.cpu.Buffer",
+                "ndbuffer": "zarr.core.buffer.cpu.NDBuffer",
+            }
+        ]
+    )
     assert config.get("array.order") == "C"
     assert config.get("async.concurrency") == 10
     assert config.get("async.timeout") is None
@@ -297,15 +302,18 @@ class NewCodec2(BytesCodec):
         get_codec_class("new_codec")
 
 
-@pytest.mark.parametrize("dtype", ["int", "bytes", "str"])
-async def test_default_codecs(dtype: str) -> None:
+@pytest.mark.parametrize("dtype_category", ["variable-length-string", "default"])
+async def test_default_codecs(dtype_category: str) -> None:
     """
     Test that the default compressors are sensitive to the current setting of the config.
     """
-    zdtype = get_data_type_from_native_dtype(dtype)
+    if dtype_category == "variable-length-string":
+        zdtype = VariableLengthString()
+    else:
+        zdtype = Int8()
     expected_compressors = (GzipCodec(),)
     new_conf = {
-        f"array.v3_default_compressors.{zdtype._zarr_v3_name.replace('.', '__')}": [
+        f"array.v3_default_compressors.{dtype_category}": [
             c.to_dict() for c in expected_compressors
         ]
     }
@@ -313,7 +321,7 @@ async def test_default_codecs(dtype: str) -> None:
         arr = await create_array(
             shape=(100,),
             chunks=(100,),
-            dtype=dtype,
+            dtype=zdtype,
             zarr_format=3,
             store=MemoryStore(),
         )

From be83f03058da71718340d35aa337d928d130a724 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Wed, 26 Mar 2025 17:39:55 +0100
Subject: [PATCH 066/130] fix S dtype test for v2

---
 tests/test_v2.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_v2.py b/tests/test_v2.py
index f3dec247b7..293359d910 100644
--- a/tests/test_v2.py
+++ b/tests/test_v2.py
@@ -87,7 +87,7 @@ async def test_v2_encode_decode(dtype, expected_dtype, fill_value, fill_value_js
             "compressor": None,
             "dtype": expected_dtype,
             "fill_value": fill_value_json,
-            "filters": [{"id": "vlen-bytes"}] if dtype == "|S" else None,
+            "filters": None,
             "order": "C",
             "shape": [3],
             "zarr_format": 2,

From a210f9fda46e09f0aa0b08e67ca18a87f24d7dfe Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Mon, 28 Apr 2025 16:30:02 +0200
Subject: [PATCH 067/130] fully remove v3jsonencoder

---
 src/zarr/api/asynchronous.py             |  1 -
 src/zarr/core/group.py                   | 10 +++--
 src/zarr/core/metadata/v2.py             |  8 ++--
 src/zarr/core/metadata/v3.py             | 51 +++++-------------------
 tests/test_metadata/test_consolidated.py | 10 +----
 tests/test_properties.py                 | 33 ++++++++++++---
 6 files changed, 50 insertions(+), 63 deletions(-)

diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py
index bbc5f99c31..50cddaa1d6 100644
--- a/src/zarr/api/asynchronous.py
+++ b/src/zarr/api/asynchronous.py
@@ -223,7 +223,6 @@ async def consolidate_metadata(
         group,
         metadata=metadata,
     )
-
     await group._save_metadata()
     return group
 
diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py
index 41c3e33baf..6c8df6aacd 100644
--- a/src/zarr/core/group.py
+++ b/src/zarr/core/group.py
@@ -49,7 +49,6 @@
 )
 from zarr.core.config import config
 from zarr.core.metadata import ArrayV2Metadata, ArrayV3Metadata
-from zarr.core.metadata.v3 import V3JsonEncoder
 from zarr.core.sync import SyncMixin, sync
 from zarr.errors import ContainsArrayError, ContainsGroupError, MetadataValidationError
 from zarr.storage import StoreLike, StorePath
@@ -334,7 +333,7 @@ def to_buffer_dict(self, prototype: BufferPrototype) -> dict[str, Buffer]:
         if self.zarr_format == 3:
             return {
                 ZARR_JSON: prototype.buffer.from_bytes(
-                    json.dumps(self.to_dict(), cls=V3JsonEncoder).encode()
+                    json.dumps(self.to_dict(), indent=json_indent, allow_nan=False).encode()
                 )
             }
         else:
@@ -343,7 +342,7 @@ def to_buffer_dict(self, prototype: BufferPrototype) -> dict[str, Buffer]:
                     json.dumps({"zarr_format": self.zarr_format}, indent=json_indent).encode()
                 ),
                 ZATTRS_JSON: prototype.buffer.from_bytes(
-                    json.dumps(self.attributes, indent=json_indent).encode()
+                    json.dumps(self.attributes, indent=json_indent, allow_nan=False).encode()
                 ),
             }
             if self.consolidated_metadata:
@@ -354,6 +353,8 @@ def to_buffer_dict(self, prototype: BufferPrototype) -> dict[str, Buffer]:
                 consolidated_metadata = self.consolidated_metadata.to_dict()["metadata"]
                 assert isinstance(consolidated_metadata, dict)
                 for k, v in consolidated_metadata.items():
+                    attrs = v.pop("attributes", {})
+                    d[f"{k}/{ZATTRS_JSON}"] = attrs
                     if "shape" in v:
                         # it's an array
                         d[f"{k}/{ZARRAY_JSON}"] = v
@@ -369,7 +370,7 @@ def to_buffer_dict(self, prototype: BufferPrototype) -> dict[str, Buffer]:
 
                 items[ZMETADATA_V2_JSON] = prototype.buffer.from_bytes(
                     json.dumps(
-                        {"metadata": d, "zarr_consolidated_format": 1}, cls=V3JsonEncoder
+                        {"metadata": d, "zarr_consolidated_format": 1}, allow_nan=False
                     ).encode()
                 )
 
@@ -608,6 +609,7 @@ def _from_bytes_v2(
                     consolidated_metadata[path].update(v)
                 else:
                     raise ValueError(f"Invalid file type '{kind}' at path '{path}")
+
             group_metadata["consolidated_metadata"] = {
                 "metadata": dict(consolidated_metadata),
                 "kind": "inline",
diff --git a/src/zarr/core/metadata/v2.py b/src/zarr/core/metadata/v2.py
index 13e775d0b0..23824520f7 100644
--- a/src/zarr/core/metadata/v2.py
+++ b/src/zarr/core/metadata/v2.py
@@ -3,7 +3,7 @@
 import base64
 import warnings
 from collections.abc import Iterable, Sequence
-from typing import TYPE_CHECKING, TypedDict
+from typing import TYPE_CHECKING, Any, TypedDict
 
 import numcodecs.abc
 
@@ -109,7 +109,7 @@ def to_buffer_dict(self, prototype: BufferPrototype) -> dict[str, Buffer]:
         json_indent = config.get("json_indent")
         return {
             ZARRAY_JSON: prototype.buffer.from_bytes(
-                json.dumps(zarray_dict, indent=json_indent).encode()
+                json.dumps(zarray_dict, indent=json_indent, allow_nan=False).encode()
             ),
             ZATTRS_JSON: prototype.buffer.from_bytes(
                 json.dumps(zattrs_dict, indent=json_indent, allow_nan=False).encode()
@@ -178,10 +178,12 @@ def to_dict(self) -> dict[str, JSON]:
                     new_filters.append(f)
             zarray_dict["filters"] = new_filters
 
+        # serialize the fill value after dtype-specific JSON encoding
         if self.fill_value is not None:
             fill_value = self.dtype.to_json_value(self.fill_value, zarr_format=2)
             zarray_dict["fill_value"] = fill_value
 
+        # serialize the dtype after fill value-specific JSON encoding
         zarray_dict["dtype"] = self.dtype.to_json(zarr_format=2)
 
         return zarray_dict
@@ -289,7 +291,7 @@ def _parse_structured_fill_value(fill_value: Any, dtype: np.dtype[Any]) -> Any:
         raise ValueError(f"Fill_value {fill_value} is not valid for dtype {dtype}.") from e
 
 
-def parse_fill_value(fill_value: Any, dtype: np.dtype[Any]) -> Any:
+def parse_fill_value(fill_value: object, dtype: np.dtype[Any]) -> Any:
     """
     Parse a potential fill value into a value that is compatible with the provided dtype.
 
diff --git a/src/zarr/core/metadata/v3.py b/src/zarr/core/metadata/v3.py
index ead05b5e44..559298c13f 100644
--- a/src/zarr/core/metadata/v3.py
+++ b/src/zarr/core/metadata/v3.py
@@ -11,7 +11,6 @@
 )
 
 if TYPE_CHECKING:
-    from collections.abc import Callable
     from typing import Self
 
     from zarr.core.buffer import Buffer, BufferPrototype
@@ -25,8 +24,6 @@
 from dataclasses import dataclass, field, replace
 from typing import Any, Literal
 
-import numpy as np
-
 from zarr.abc.codec import ArrayArrayCodec, ArrayBytesCodec, BytesBytesCodec, Codec
 from zarr.core.array_spec import ArrayConfig, ArraySpec
 from zarr.core.chunk_grids import ChunkGrid, RegularChunkGrid
@@ -43,15 +40,6 @@
 from zarr.errors import MetadataValidationError, NodeTypeValidationError
 from zarr.registry import get_codec_class
 
-DEFAULT_DTYPE = "float64"
-
-# Keep in sync with _replace_special_floats
-SPECIAL_FLOATS_ENCODED = {
-    "Infinity": np.inf,
-    "-Infinity": -np.inf,
-    "NaN": np.nan,
-}
-
 
 def parse_zarr_format(data: object) -> Literal[3]:
     if data == 3:
@@ -141,33 +129,6 @@ def parse_storage_transformers(data: object) -> tuple[dict[str, JSON], ...]:
     )
 
 
-class V3JsonEncoder(json.JSONEncoder):
-    def __init__(
-        self,
-        *,
-        skipkeys: bool = False,
-        ensure_ascii: bool = True,
-        check_circular: bool = True,
-        allow_nan: bool = True,
-        sort_keys: bool = False,
-        indent: int | None = None,
-        separators: tuple[str, str] | None = None,
-        default: Callable[[object], object] | None = None,
-    ) -> None:
-        if indent is None:
-            indent = config.get("json_indent")
-        super().__init__(
-            skipkeys=skipkeys,
-            ensure_ascii=ensure_ascii,
-            check_circular=check_circular,
-            allow_nan=allow_nan,
-            sort_keys=sort_keys,
-            indent=indent,
-            separators=separators,
-            default=default,
-        )
-
-
 class ArrayV3MetadataDict(TypedDict):
     """
     A typed dictionary model for zarr v3 metadata.
@@ -259,6 +220,10 @@ def _validate_metadata(self) -> None:
     def ndim(self) -> int:
         return len(self.shape)
 
+    @property
+    def dtype(self) -> ZDType[_BaseDType, _BaseScalar]:
+        return self.data_type
+
     @property
     def chunks(self) -> ChunkCoords:
         if isinstance(self.chunk_grid, RegularChunkGrid):
@@ -306,9 +271,13 @@ def encode_chunk_key(self, chunk_coords: ChunkCoords) -> str:
         return self.chunk_key_encoding.encode_chunk_key(chunk_coords)
 
     def to_buffer_dict(self, prototype: BufferPrototype) -> dict[str, Buffer]:
+        json_indent = config.get("json_indent")
         d = self.to_dict()
-        # d = _replace_special_floats(self.to_dict())
-        return {ZARR_JSON: prototype.buffer.from_bytes(json.dumps(d, cls=V3JsonEncoder).encode())}
+        return {
+            ZARR_JSON: prototype.buffer.from_bytes(
+                json.dumps(d, allow_nan=False, indent=json_indent).encode()
+            )
+        }
 
     @classmethod
     def from_dict(cls, data: dict[str, JSON]) -> Self:
diff --git a/tests/test_metadata/test_consolidated.py b/tests/test_metadata/test_consolidated.py
index 71720af58b..b2244c5047 100644
--- a/tests/test_metadata/test_consolidated.py
+++ b/tests/test_metadata/test_consolidated.py
@@ -581,7 +581,6 @@ async def test_consolidated_metadata_encodes_special_chars(
     memory_store: Store, zarr_format: ZarrFormat, fill_value: float
 ):
     root = await group(store=memory_store, zarr_format=zarr_format)
-    _child = await root.create_group("child", attributes={"test": fill_value})
     _time = await root.create_array("time", shape=(12,), dtype=np.float64, fill_value=fill_value)
     await zarr.api.asynchronous.consolidate_metadata(memory_store)
 
@@ -595,16 +594,9 @@ async def test_consolidated_metadata_encodes_special_chars(
             "consolidated_metadata"
         ]["metadata"]
 
-    if np.isnan(fill_value):
-        expected_fill_value = "NaN"
-    elif np.isneginf(fill_value):
-        expected_fill_value = "-Infinity"
-    elif np.isinf(fill_value):
-        expected_fill_value = "Infinity"
+    expected_fill_value = _time._zdtype.to_json_value(fill_value, zarr_format=2)
 
     if zarr_format == 2:
-        assert root_metadata["child/.zattrs"]["test"] == expected_fill_value
         assert root_metadata["time/.zarray"]["fill_value"] == expected_fill_value
     elif zarr_format == 3:
-        assert root_metadata["child"]["attributes"]["test"] == expected_fill_value
         assert root_metadata["time"]["fill_value"] == expected_fill_value
diff --git a/tests/test_properties.py b/tests/test_properties.py
index d48dfe2fef..7c741ec873 100644
--- a/tests/test_properties.py
+++ b/tests/test_properties.py
@@ -239,6 +239,29 @@ def test_roundtrip_array_metadata_from_json(data: st.DataObject, zarr_format: in
 #     assert_array_equal(nparray, zarray[:])
 
 
+def serialized_complex_float_is_valid(
+    serialized: tuple[numbers.Real | str, numbers.Real | str],
+) -> bool:
+    """
+    Validate that the serialized representation of a complex float conforms to the spec.
+
+    The specification requires that a serialized complex float must be either:
+      - A JSON number, or
+      - One of the strings "NaN", "Infinity", or "-Infinity".
+
+    Args:
+        serialized: The value produced by JSON serialization for a complex floating point number.
+
+    Returns:
+        bool: True if the serialized value is valid according to the spec, False otherwise.
+    """
+    return (
+        isinstance(serialized, tuple)
+        and len(serialized) == 2
+        and all(serialized_float_is_valid(x) for x in serialized)
+    )
+
+
 def serialized_float_is_valid(serialized: numbers.Real | str) -> bool:
     """
     Validate that the serialized representation of a float conforms to the spec.
@@ -294,11 +317,11 @@ def test_array_metadata_meets_spec(meta: ArrayV2Metadata | ArrayV3Metadata) -> N
         assert asdict_dict["zarr_format"] == 3
 
     # version-agnostic validations
-    if meta.dtype.kind == "f":
+    dtype_native = meta.dtype.to_dtype()
+    if dtype_native.kind == "f":
         assert serialized_float_is_valid(asdict_dict["fill_value"])
-    elif meta.dtype.kind == "c":
+    elif dtype_native.kind == "c":
         # fill_value should be a two-element array [real, imag].
-        assert serialized_float_is_valid(asdict_dict["fill_value"].real)
-        assert serialized_float_is_valid(asdict_dict["fill_value"].imag)
-    elif meta.dtype.kind == "M" and np.isnat(meta.fill_value):
+        assert serialized_complex_float_is_valid(asdict_dict["fill_value"])
+    elif dtype_native.kind == "M" and np.isnat(meta.fill_value):
         assert asdict_dict["fill_value"] == "NaT"

From 8fbd29a42f6529184508bec5f9ef2e08cf9cdd84 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Tue, 29 Apr 2025 15:06:11 +0200
Subject: [PATCH 068/130] refactor dtype module structure

---
 src/zarr/codecs/bytes.py                  |    2 +-
 src/zarr/codecs/sharding.py               |    2 +-
 src/zarr/core/config.py                   |    4 +-
 src/zarr/core/dtype/__init__.py           |   33 +-
 src/zarr/core/dtype/_numpy.py             | 1397 ---------------------
 src/zarr/core/dtype/common.py             |  513 +-------
 src/zarr/core/dtype/npy/__init__.py       |    0
 src/zarr/core/dtype/npy/bool.py           |  114 ++
 src/zarr/core/dtype/npy/common.py         |  578 +++++++++
 src/zarr/core/dtype/npy/complex.py        |  155 +++
 src/zarr/core/dtype/npy/float.py          |  154 +++
 src/zarr/core/dtype/npy/int.py            |  318 +++++
 src/zarr/core/dtype/npy/sized.py          |  382 ++++++
 src/zarr/core/dtype/npy/string.py         |  134 ++
 src/zarr/core/dtype/npy/time.py           |  142 +++
 src/zarr/core/metadata/v2.py              |    2 +-
 src/zarr/core/metadata/v3.py              |    1 +
 tests/conftest.py                         |    4 +-
 tests/package_with_entrypoint/__init__.py |    2 +-
 tests/test_array.py                       |   12 +-
 tests/test_config.py                      |    2 +-
 tests/test_dtype.py                       |   28 +-
 tests/test_info.py                        |    2 +-
 tests/test_metadata/test_v2.py            |    3 +-
 tests/test_metadata/test_v3.py            |    4 +-
 25 files changed, 2031 insertions(+), 1957 deletions(-)
 delete mode 100644 src/zarr/core/dtype/_numpy.py
 create mode 100644 src/zarr/core/dtype/npy/__init__.py
 create mode 100644 src/zarr/core/dtype/npy/bool.py
 create mode 100644 src/zarr/core/dtype/npy/common.py
 create mode 100644 src/zarr/core/dtype/npy/complex.py
 create mode 100644 src/zarr/core/dtype/npy/float.py
 create mode 100644 src/zarr/core/dtype/npy/int.py
 create mode 100644 src/zarr/core/dtype/npy/sized.py
 create mode 100644 src/zarr/core/dtype/npy/string.py
 create mode 100644 src/zarr/core/dtype/npy/time.py

diff --git a/src/zarr/codecs/bytes.py b/src/zarr/codecs/bytes.py
index 80972096c2..6c28bfe543 100644
--- a/src/zarr/codecs/bytes.py
+++ b/src/zarr/codecs/bytes.py
@@ -10,7 +10,7 @@
 from zarr.abc.codec import ArrayBytesCodec
 from zarr.core.buffer import Buffer, NDArrayLike, NDBuffer
 from zarr.core.common import JSON, parse_enum, parse_named_configuration
-from zarr.core.dtype._numpy import endianness_to_numpy_str
+from zarr.core.dtype.npy.common import endianness_to_numpy_str
 from zarr.registry import register_codec
 
 if TYPE_CHECKING:
diff --git a/src/zarr/codecs/sharding.py b/src/zarr/codecs/sharding.py
index e8a23e20c4..12d709b599 100644
--- a/src/zarr/codecs/sharding.py
+++ b/src/zarr/codecs/sharding.py
@@ -43,7 +43,7 @@
     parse_shapelike,
     product,
 )
-from zarr.core.dtype._numpy import UInt64
+from zarr.core.dtype.npy.int import UInt64
 from zarr.core.indexing import (
     BasicIndexer,
     SelectorTuple,
diff --git a/src/zarr/core/config.py b/src/zarr/core/config.py
index 7c61c2e6ac..08674d9a66 100644
--- a/src/zarr/core/config.py
+++ b/src/zarr/core/config.py
@@ -144,12 +144,12 @@ def parse_indexing_order(data: Any) -> Literal["C", "F"]:
 
 def categorize_data_type(dtype: ZDType[Any, Any]) -> DTypeCategory:
     """
-    Classify a ZDType. The return value is a string which belongs to the type ``DTypeKind``.
+    Classify a ZDType. The return value is a string which belongs to the type ``DTypeCategory``.
 
     This is used by the config system to determine how to encode arrays with the associated data type
     when the user has not specified a particular serialization scheme.
     """
-    from zarr.core.dtype._numpy import VariableLengthString
+    from zarr.core.dtype import VariableLengthString
 
     if isinstance(dtype, VariableLengthString):
         return "variable-length-string"
diff --git a/src/zarr/core/dtype/__init__.py b/src/zarr/core/dtype/__init__.py
index 0aaf9ccf06..63b593fd28 100644
--- a/src/zarr/core/dtype/__init__.py
+++ b/src/zarr/core/dtype/__init__.py
@@ -2,6 +2,18 @@
 
 from typing import TYPE_CHECKING, TypeAlias, get_args
 
+from zarr.core.dtype.npy.bool import Bool
+from zarr.core.dtype.npy.complex import Complex64, Complex128
+from zarr.core.dtype.npy.float import Float16, Float32, Float64
+from zarr.core.dtype.npy.int import Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64
+from zarr.core.dtype.npy.sized import (
+    FixedLengthAscii,
+    FixedLengthBytes,
+    FixedLengthUnicode,
+    Structured,
+)
+from zarr.core.dtype.npy.time import DateTime64
+
 if TYPE_CHECKING:
     from zarr.core.common import ZarrFormat
 
@@ -9,27 +21,8 @@
 import numpy.typing as npt
 
 from zarr.core.common import JSON
-from zarr.core.dtype._numpy import (
+from zarr.core.dtype.npy.string import (
     _NUMPY_SUPPORTS_VLEN_STRING,
-    Bool,
-    Complex64,
-    Complex128,
-    DateTime64,
-    FixedLengthAscii,
-    FixedLengthBytes,
-    FixedLengthUnicode,
-    Float16,
-    Float32,
-    Float64,
-    Int8,
-    Int16,
-    Int32,
-    Int64,
-    Structured,
-    UInt8,
-    UInt16,
-    UInt32,
-    UInt64,
     VariableLengthString,
 )
 from zarr.core.dtype.registry import DataTypeRegistry
diff --git a/src/zarr/core/dtype/_numpy.py b/src/zarr/core/dtype/_numpy.py
deleted file mode 100644
index 51be83b173..0000000000
--- a/src/zarr/core/dtype/_numpy.py
+++ /dev/null
@@ -1,1397 +0,0 @@
-from __future__ import annotations
-
-import base64
-import re
-import sys
-from collections.abc import Sequence
-from dataclasses import dataclass
-from typing import (
-    TYPE_CHECKING,
-    Any,
-    ClassVar,
-    Literal,
-    Self,
-    SupportsComplex,
-    SupportsFloat,
-    SupportsIndex,
-    SupportsInt,
-    TypeGuard,
-    TypeVar,
-    cast,
-    get_args,
-)
-
-import numpy as np
-
-from zarr.core.dtype.common import (
-    DataTypeValidationError,
-    Endianness,
-    bytes_from_json,
-    bytes_to_json,
-    check_json_bool,
-    check_json_complex_float,
-    check_json_float,
-    check_json_int,
-    check_json_str,
-    complex_float_from_json,
-    complex_float_to_json,
-    datetime_from_json,
-    datetime_to_json,
-    float_from_json,
-    float_to_json,
-)
-from zarr.core.dtype.wrapper import ZDType, _BaseDType, _BaseScalar
-
-if TYPE_CHECKING:
-    from zarr.core.common import JSON, ZarrFormat
-
-EndiannessNumpy = Literal[">", "<", "|", "="]
-IntLike = SupportsInt | SupportsIndex | bytes | str
-FloatLike = SupportsIndex | SupportsFloat | bytes | str
-ComplexLike = SupportsFloat | SupportsIndex | SupportsComplex | bytes | str | None
-
-
-@dataclass(frozen=True)
-class HasEndianness:
-    """
-    This is a mix-in class for data types with an endianness attribute
-    """
-
-    endianness: Endianness | None = "little"
-
-
-@dataclass(frozen=True)
-class HasLength:
-    """
-    This is a mix-in class for data types with a length attribute
-    """
-
-    length: int
-
-
-@dataclass(frozen=True, kw_only=True, slots=True)
-class Bool(ZDType[np.dtypes.BoolDType, np.bool_]):
-    """
-    Wrapper for numpy boolean dtype.
-
-    Attributes
-    ----------
-    name : str
-        The name of the dtype.
-    dtype_cls : ClassVar[type[np.dtypes.BoolDType]]
-        The numpy dtype class.
-    """
-
-    _zarr_v3_name = "bool"
-    _zarr_v2_names: ClassVar[tuple[str, ...]] = ("|b1",)
-    dtype_cls = np.dtypes.BoolDType
-
-    @classmethod
-    def _from_dtype_unsafe(cls, dtype: _BaseDType) -> Self:
-        return cls()
-
-    def to_dtype(self: Self) -> np.dtypes.BoolDType:
-        return self.dtype_cls()
-
-    @classmethod
-    def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[Literal["bool", "|b1"]]:
-        """
-        Check that the input is a valid JSON representation of a bool.
-        """
-        if zarr_format == 2:
-            return data in cls._zarr_v2_names
-        elif zarr_format == 3:
-            return data == cls._zarr_v3_name
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
-
-    def to_json(self, zarr_format: ZarrFormat) -> str:
-        if zarr_format == 2:
-            return self.to_dtype().str
-        elif zarr_format == 3:
-            return self._zarr_v3_name
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
-
-    @classmethod
-    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
-        return cls()
-
-    def default_value(self) -> np.bool_:
-        """
-        Get the default value for the boolean dtype.
-
-        Returns
-        -------
-        np.bool_
-            The default value.
-        """
-        return np.False_
-
-    def to_json_value(self, data: object, zarr_format: ZarrFormat) -> bool:
-        """
-        Convert a scalar to a python bool.
-
-        Parameters
-        ----------
-        data : object
-            The value to convert.
-        zarr_format : ZarrFormat
-            The zarr format version.
-
-        Returns
-        -------
-        bool
-            The JSON-serializable format.
-        """
-        return bool(data)
-
-    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.bool_:
-        """
-        Read a JSON-serializable value as a numpy boolean scalar.
-
-        Parameters
-        ----------
-        data : JSON
-            The JSON-serializable value.
-        zarr_format : ZarrFormat
-            The zarr format version.
-
-        Returns
-        -------
-        np.bool_
-            The numpy boolean scalar.
-        """
-        if check_json_bool(data):
-            return self._cast_value_unsafe(data)
-        raise TypeError(f"Invalid type: {data}. Expected a boolean.")
-
-    def check_value(self, data: object) -> bool:
-        # Anything can become a bool
-        return True
-
-    def cast_value(self, value: object) -> np.bool_:
-        return self._cast_value_unsafe(value)
-
-    def _cast_value_unsafe(self, value: object) -> np.bool_:
-        return np.bool_(value)
-
-
-_NumpyIntDType = (
-    np.dtypes.Int8DType
-    | np.dtypes.Int16DType
-    | np.dtypes.Int32DType
-    | np.dtypes.Int64DType
-    | np.dtypes.UInt8DType
-    | np.dtypes.UInt16DType
-    | np.dtypes.UInt32DType
-    | np.dtypes.UInt64DType
-)
-_NumpyIntScalar = (
-    np.int8 | np.int16 | np.int32 | np.int64 | np.uint8 | np.uint16 | np.uint32 | np.uint64
-)
-TIntDType_co = TypeVar("TIntDType_co", bound=_NumpyIntDType, covariant=True)
-TIntScalar_co = TypeVar("TIntScalar_co", bound=_NumpyIntScalar, covariant=True)
-
-
-@dataclass(frozen=True)
-class BaseInt(ZDType[TIntDType_co, TIntScalar_co]):
-    # This attribute holds the possible zarr v2 JSON names for the data type
-    _zarr_v2_names: ClassVar[tuple[str, ...]]
-
-    def to_json(self, zarr_format: ZarrFormat) -> str:
-        """
-        Convert the wrapped data type to a JSON-serializable form.
-
-        Parameters
-        ----------
-        zarr_format : ZarrFormat
-            The zarr format version.
-
-        Returns
-        -------
-        str
-            The JSON-serializable representation of the wrapped data type
-        """
-        if zarr_format == 2:
-            return self.to_dtype().str
-        elif zarr_format == 3:
-            return self._zarr_v3_name
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
-
-    @classmethod
-    def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[JSON]:
-        """
-        Check that the input is a valid JSON representation of this data type.
-        """
-        if zarr_format == 2:
-            return data in cls._zarr_v2_names
-        elif zarr_format == 3:
-            return data == cls._zarr_v3_name
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
-
-    def check_value(self, value: object) -> TypeGuard[IntLike]:
-        return isinstance(value, IntLike)
-
-    def _cast_value_unsafe(self, value: object) -> TIntScalar_co:
-        if self.check_value(value):
-            return self.to_dtype().type(value)  # type: ignore[return-value]
-        raise TypeError(f"Invalid type: {value}. Expected a value castable to an integer.")
-
-    def default_value(self) -> TIntScalar_co:
-        """
-        Get the default value, which is 0 cast to this dtype
-
-        Returns
-        -------
-        Int scalar
-            The default value.
-        """
-        return self._cast_value_unsafe(0)
-
-    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> TIntScalar_co:
-        """
-        Read a JSON-serializable value as a numpy int scalar.
-
-        Parameters
-        ----------
-        data : JSON
-            The JSON-serializable value.
-        zarr_format : ZarrFormat
-            The zarr format version.
-
-        Returns
-        -------
-        TScalar_co
-            The numpy scalar.
-        """
-        if check_json_int(data):
-            return self._cast_value_unsafe(data)
-        raise TypeError(f"Invalid type: {data}. Expected an integer.")
-
-    def to_json_value(self, data: object, zarr_format: ZarrFormat) -> int:
-        """
-        Convert an object to JSON-serializable scalar.
-
-        Parameters
-        ----------
-        data : _BaseScalar
-            The value to convert.
-        zarr_format : ZarrFormat
-            The zarr format version.
-
-        Returns
-        -------
-        int
-            The JSON-serializable form of the scalar.
-        """
-        return int(self.cast_value(data))
-
-
-@dataclass(frozen=True, kw_only=True)
-class Int8(BaseInt[np.dtypes.Int8DType, np.int8]):
-    dtype_cls = np.dtypes.Int8DType
-    _zarr_v3_name = "int8"
-    _zarr_v2_names: ClassVar[tuple[str, ...]] = ("|i1",)
-
-    @classmethod
-    def _from_dtype_unsafe(cls, dtype: _BaseDType) -> Self:
-        return cls()
-
-    def to_dtype(self: Self) -> np.dtypes.Int8DType:
-        return self.dtype_cls()
-
-    @classmethod
-    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
-        return cls()
-
-
-@dataclass(frozen=True, kw_only=True)
-class UInt8(BaseInt[np.dtypes.UInt8DType, np.uint8]):
-    dtype_cls = np.dtypes.UInt8DType
-    _zarr_v3_name = "uint8"
-    _zarr_v2_names: ClassVar[tuple[str, ...]] = ("|u1",)
-
-    @classmethod
-    def _from_dtype_unsafe(cls, dtype: _BaseDType) -> Self:
-        return cls()
-
-    def to_dtype(self: Self) -> np.dtypes.UInt8DType:
-        return self.dtype_cls()
-
-    @classmethod
-    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
-        return cls()
-
-
-@dataclass(frozen=True, kw_only=True)
-class Int16(BaseInt[np.dtypes.Int16DType, np.int16], HasEndianness):
-    dtype_cls = np.dtypes.Int16DType
-    _zarr_v3_name = "int16"
-    _zarr_v2_names: ClassVar[tuple[str, ...]] = (">i2", "<i2")
-
-    @classmethod
-    def _from_dtype_unsafe(cls, dtype: _BaseDType) -> Self:
-        byte_order = cast("EndiannessNumpy", dtype.byteorder)
-        return cls(endianness=endianness_from_numpy_str(byte_order))
-
-    def to_dtype(self) -> np.dtypes.Int16DType:
-        byte_order = endianness_to_numpy_str(self.endianness)
-        return self.dtype_cls().newbyteorder(byte_order)
-
-    @classmethod
-    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
-        if zarr_format == 2:
-            # This ensures that we get the endianness correct without annoying string parsing
-            return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
-        elif zarr_format == 3:
-            return cls()
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
-
-
-@dataclass(frozen=True, kw_only=True)
-class UInt16(BaseInt[np.dtypes.UInt16DType, np.uint16], HasEndianness):
-    dtype_cls = np.dtypes.UInt16DType
-    _zarr_v3_name = "uint16"
-    _zarr_v2_names: ClassVar[tuple[str, ...]] = (">u2", "<u2")
-
-    @classmethod
-    def _from_dtype_unsafe(cls, dtype: _BaseDType) -> Self:
-        byte_order = cast("EndiannessNumpy", dtype.byteorder)
-        return cls(endianness=endianness_from_numpy_str(byte_order))
-
-    def to_dtype(self) -> np.dtypes.UInt16DType:
-        byte_order = endianness_to_numpy_str(self.endianness)
-        return self.dtype_cls().newbyteorder(byte_order)
-
-    @classmethod
-    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
-        if zarr_format == 2:
-            return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
-        elif zarr_format == 3:
-            return cls()
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
-
-
-@dataclass(frozen=True, kw_only=True)
-class Int32(BaseInt[np.dtypes.Int32DType, np.int32], HasEndianness):
-    dtype_cls = np.dtypes.Int32DType
-    _zarr_v3_name = "int32"
-    _zarr_v2_names: ClassVar[tuple[str, ...]] = (">i4", "<i4")
-
-    @classmethod
-    def from_dtype(cls: type[Self], dtype: _BaseDType) -> Self:
-        # We override the base implementation to address a windows-specific, pre-numpy 2 issue where
-        # ``np.dtype('i')`` is an instance of ``np.dtypes.IntDType`` that acts like `int32` instead of ``np.dtype('int32')``
-        # In this case, ``type(np.dtype('i')) == np.dtypes.Int32DType``  will evaluate to ``True``,
-        # despite the two classes being different. Thus we will create an instance of `cls` with the
-        # latter dtype, after pulling in the byte order of the input
-        if dtype == np.dtypes.Int32DType():
-            return cls._from_dtype_unsafe(np.dtypes.Int32DType().newbyteorder(dtype.byteorder))
-        else:
-            return super().from_dtype(dtype)
-
-    @classmethod
-    def _from_dtype_unsafe(cls, dtype: _BaseDType) -> Self:
-        byte_order = cast("EndiannessNumpy", dtype.byteorder)
-        return cls(endianness=endianness_from_numpy_str(byte_order))
-
-    def to_dtype(self) -> np.dtypes.Int32DType:
-        byte_order = endianness_to_numpy_str(self.endianness)
-        return self.dtype_cls().newbyteorder(byte_order)
-
-    @classmethod
-    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
-        if zarr_format == 2:
-            return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
-        elif zarr_format == 3:
-            return cls()
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
-
-
-@dataclass(frozen=True, kw_only=True)
-class UInt32(BaseInt[np.dtypes.UInt32DType, np.uint32], HasEndianness):
-    dtype_cls = np.dtypes.UInt32DType
-    _zarr_v3_name = "uint32"
-    _zarr_v2_names: ClassVar[tuple[str, ...]] = (">u4", "<u4")
-
-    @classmethod
-    def _from_dtype_unsafe(cls, dtype: _BaseDType) -> Self:
-        byte_order = cast("EndiannessNumpy", dtype.byteorder)
-        return cls(endianness=endianness_from_numpy_str(byte_order))
-
-    def to_dtype(self) -> np.dtypes.UInt32DType:
-        byte_order = endianness_to_numpy_str(self.endianness)
-        return self.dtype_cls().newbyteorder(byte_order)
-
-    @classmethod
-    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
-        if zarr_format == 2:
-            return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
-        elif zarr_format == 3:
-            return cls()
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
-
-
-@dataclass(frozen=True, kw_only=True)
-class Int64(BaseInt[np.dtypes.Int64DType, np.int64], HasEndianness):
-    dtype_cls = np.dtypes.Int64DType
-    _zarr_v3_name = "int64"
-    _zarr_v2_names: ClassVar[tuple[str, ...]] = (">i8", "<i8")
-
-    @classmethod
-    def _from_dtype_unsafe(cls, dtype: _BaseDType) -> Self:
-        byte_order = cast("EndiannessNumpy", dtype.byteorder)
-        return cls(endianness=endianness_from_numpy_str(byte_order))
-
-    def to_dtype(self) -> np.dtypes.Int64DType:
-        byte_order = endianness_to_numpy_str(self.endianness)
-        return self.dtype_cls().newbyteorder(byte_order)
-
-    @classmethod
-    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
-        if zarr_format == 2:
-            return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
-        elif zarr_format == 3:
-            return cls()
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
-
-
-@dataclass(frozen=True, kw_only=True)
-class UInt64(BaseInt[np.dtypes.UInt64DType, np.uint64], HasEndianness):
-    dtype_cls = np.dtypes.UInt64DType
-    _zarr_v3_name = "uint64"
-    _zarr_v2_names: ClassVar[tuple[str, ...]] = (">u8", "<u8")
-
-    @classmethod
-    def _from_dtype_unsafe(cls, dtype: _BaseDType) -> Self:
-        byte_order = cast("EndiannessNumpy", dtype.byteorder)
-        return cls(endianness=endianness_from_numpy_str(byte_order))
-
-    def to_dtype(self) -> np.dtypes.UInt64DType:
-        byte_order = endianness_to_numpy_str(self.endianness)
-        return self.dtype_cls().newbyteorder(byte_order)
-
-    @classmethod
-    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
-        if zarr_format == 2:
-            return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
-        elif zarr_format == 3:
-            return cls()
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
-
-
-TFloatDType_co = TypeVar(
-    "TFloatDType_co",
-    bound=np.dtypes.Float16DType | np.dtypes.Float32DType | np.dtypes.Float64DType,
-    covariant=True,
-)
-TFloatScalar_co = TypeVar(
-    "TFloatScalar_co", bound=np.float16 | np.float32 | np.float64, covariant=True
-)
-
-
-@dataclass(frozen=True)
-class BaseFloat(ZDType[TFloatDType_co, TFloatScalar_co], HasEndianness):
-    # This attribute holds the possible zarr v2 JSON names for the data type
-    _zarr_v2_names: ClassVar[tuple[str, ...]]
-
-    @classmethod
-    def _from_dtype_unsafe(cls, dtype: _BaseDType) -> Self:
-        byte_order = cast("EndiannessNumpy", dtype.byteorder)
-        return cls(endianness=endianness_from_numpy_str(byte_order))
-
-    def to_dtype(self) -> TFloatDType_co:
-        byte_order = endianness_to_numpy_str(self.endianness)
-        return self.dtype_cls().newbyteorder(byte_order)  # type: ignore[return-value]
-
-    def to_json(self, zarr_format: ZarrFormat) -> str:
-        """
-        Convert the wrapped data type to a JSON-serializable form.
-
-        Parameters
-        ----------
-        zarr_format : ZarrFormat
-            The zarr format version.
-
-        Returns
-        -------
-        str
-            The JSON-serializable representation of the wrapped data type
-        """
-        if zarr_format == 2:
-            return self.to_dtype().str
-        elif zarr_format == 3:
-            return self._zarr_v3_name
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
-
-    @classmethod
-    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
-        if zarr_format == 2:
-            return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
-        elif zarr_format == 3:
-            return cls()
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
-
-    @classmethod
-    def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[JSON]:
-        """
-        Check that the input is a valid JSON representation of this data type.
-        """
-        if zarr_format == 2:
-            return data in cls._zarr_v2_names
-        elif zarr_format == 3:
-            return data == cls._zarr_v3_name
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
-
-    def check_value(self, value: object) -> TypeGuard[FloatLike]:
-        return isinstance(value, FloatLike)
-
-    def _cast_value_unsafe(self, value: object) -> TFloatScalar_co:
-        if self.check_value(value):
-            return self.to_dtype().type(value)  # type: ignore[return-value]
-        raise TypeError(f"Invalid type: {value}. Expected a value castable to a float.")
-
-    def default_value(self) -> TFloatScalar_co:
-        """
-        Get the default value, which is 0 cast to this dtype
-
-        Returns
-        -------
-        Int scalar
-            The default value.
-        """
-        return self._cast_value_unsafe(0)
-
-    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> TFloatScalar_co:
-        """
-        Read a JSON-serializable value as a numpy float.
-
-        Parameters
-        ----------
-        data : JSON
-            The JSON-serializable value.
-        zarr_format : ZarrFormat
-            The zarr format version.
-
-        Returns
-        -------
-        TScalar_co
-            The numpy float.
-        """
-        if check_json_float(data, zarr_format=zarr_format):
-            return self._cast_value_unsafe(float_from_json(data, zarr_format=zarr_format))
-        raise TypeError(
-            f"Invalid type: {data}. Expected a float or a special string encoding of a float."
-        )
-
-    def to_json_value(self, data: object, zarr_format: ZarrFormat) -> float | str:
-        """
-        Convert an object to a JSON-serializable float.
-
-        Parameters
-        ----------
-        data : _BaseScalar
-            The value to convert.
-        zarr_format : ZarrFormat
-            The zarr format version.
-
-        Returns
-        -------
-        JSON
-            The JSON-serializable form of the float, which is potentially a number or a string.
-            See the zarr specifications for details on the JSON encoding for floats.
-        """
-        return float_to_json(self._cast_value_unsafe(data), zarr_format=zarr_format)
-
-
-@dataclass(frozen=True, kw_only=True)
-class Float16(BaseFloat[np.dtypes.Float16DType, np.float16]):
-    dtype_cls = np.dtypes.Float16DType
-    _zarr_v3_name = "float16"
-    _zarr_v2_names: ClassVar[tuple[str, ...]] = (">f2", "<f2")
-
-
-@dataclass(frozen=True, kw_only=True)
-class Float32(BaseFloat[np.dtypes.Float32DType, np.float32]):
-    dtype_cls = np.dtypes.Float32DType
-    _zarr_v3_name = "float32"
-    _zarr_v2_names: ClassVar[tuple[str, ...]] = (">f4", "<f4")
-
-
-@dataclass(frozen=True, kw_only=True)
-class Float64(BaseFloat[np.dtypes.Float64DType, np.float64]):
-    dtype_cls = np.dtypes.Float64DType
-    _zarr_v3_name = "float64"
-    _zarr_v2_names: ClassVar[tuple[str, ...]] = (">f8", "<f8")
-
-
-TComplexDType_co = TypeVar(
-    "TComplexDType_co", bound=np.dtypes.Complex64DType | np.dtypes.Complex128DType, covariant=True
-)
-TComplexScalar_co = TypeVar("TComplexScalar_co", bound=np.complex64 | np.complex128, covariant=True)
-
-
-@dataclass(frozen=True)
-class BaseComplex(ZDType[TComplexDType_co, TComplexScalar_co], HasEndianness):
-    # This attribute holds the possible zarr v2 JSON names for the data type
-    _zarr_v2_names: ClassVar[tuple[str, ...]]
-
-    @classmethod
-    def _from_dtype_unsafe(cls, dtype: _BaseDType) -> Self:
-        byte_order = cast("EndiannessNumpy", dtype.byteorder)
-        return cls(endianness=endianness_from_numpy_str(byte_order))
-
-    def to_dtype(self) -> TComplexDType_co:
-        byte_order = endianness_to_numpy_str(self.endianness)
-        return self.dtype_cls().newbyteorder(byte_order)  # type: ignore[return-value]
-
-    def to_json(self, zarr_format: ZarrFormat) -> str:
-        """
-        Convert the wrapped data type to a JSON-serializable form.
-
-        Parameters
-        ----------
-        zarr_format : ZarrFormat
-            The zarr format version.
-
-        Returns
-        -------
-        str
-            The JSON-serializable representation of the wrapped data type
-        """
-        if zarr_format == 2:
-            return self.to_dtype().str
-        elif zarr_format == 3:
-            return self._zarr_v3_name
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
-
-    @classmethod
-    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
-        if zarr_format == 2:
-            return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
-        elif zarr_format == 3:
-            return cls()
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
-
-    @classmethod
-    def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[JSON]:
-        """
-        Check that the input is a valid JSON representation of this data type.
-        """
-        if zarr_format == 2:
-            return data in cls._zarr_v2_names
-        elif zarr_format == 3:
-            return data == cls._zarr_v3_name
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
-
-    def check_value(self, value: object) -> bool:
-        return isinstance(value, ComplexLike)
-
-    def _cast_value_unsafe(self, value: object) -> TComplexScalar_co:
-        if self.check_value(value):
-            return self.to_dtype().type(value)  # type: ignore[arg-type, return-value]
-        raise TypeError(f"Invalid type: {value}. Expected a value castable to a complex scalar.")
-
-    def default_value(self) -> TComplexScalar_co:
-        """
-        Get the default value, which is 0 cast to this dtype
-
-        Returns
-        -------
-        Int scalar
-            The default value.
-        """
-        return self._cast_value_unsafe(0)
-
-    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> TComplexScalar_co:
-        """
-        Read a JSON-serializable value as a numpy float.
-
-        Parameters
-        ----------
-        data : JSON
-            The JSON-serializable value.
-        zarr_format : ZarrFormat
-            The zarr format version.
-
-        Returns
-        -------
-        TScalar_co
-            The numpy float.
-        """
-        if check_json_complex_float(data, zarr_format=zarr_format):
-            return self._cast_value_unsafe(complex_float_from_json(data, zarr_format=zarr_format))
-        raise TypeError(
-            f"Invalid type: {data}. Expected a float or a special string encoding of a float."
-        )
-
-    def to_json_value(self, data: object, zarr_format: ZarrFormat) -> JSON:
-        """
-        Convert an object to a JSON-serializable float.
-
-        Parameters
-        ----------
-        data : _BaseScalar
-            The value to convert.
-        zarr_format : ZarrFormat
-            The zarr format version.
-
-        Returns
-        -------
-        JSON
-            The JSON-serializable form of the complex number, which is a list of two floats,
-            each of which is encoding according to a zarr-format-specific encoding.
-        """
-        return complex_float_to_json(self.cast_value(data), zarr_format=zarr_format)
-
-
-@dataclass(frozen=True, kw_only=True)
-class Complex64(BaseComplex[np.dtypes.Complex64DType, np.complex64]):
-    dtype_cls = np.dtypes.Complex64DType
-    _zarr_v3_name = "complex64"
-    _zarr_v2_names: ClassVar[tuple[str, ...]] = (">c8", "<c8")
-
-
-@dataclass(frozen=True, kw_only=True)
-class Complex128(BaseComplex[np.dtypes.Complex128DType, np.complex128], HasEndianness):
-    dtype_cls = np.dtypes.Complex128DType
-    _zarr_v3_name = "complex128"
-    _zarr_v2_names: ClassVar[tuple[str, ...]] = (">c16", "<c16")
-
-
-@dataclass(frozen=True, kw_only=True)
-class FixedLengthAscii(ZDType[np.dtypes.BytesDType[int], np.bytes_], HasLength):
-    dtype_cls = np.dtypes.BytesDType
-    _zarr_v3_name = "numpy.fixed_length_ascii"
-    item_size_bits: ClassVar[int] = 8
-
-    @classmethod
-    def _from_dtype_unsafe(cls, dtype: _BaseDType) -> Self:
-        return cls(length=dtype.itemsize // (cls.item_size_bits // 8))
-
-    def to_dtype(self) -> np.dtypes.BytesDType[int]:
-        return self.dtype_cls(self.length)
-
-    @classmethod
-    def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[JSON]:
-        """
-        Check that the input is a valid JSON representation of a numpy S dtype.
-        """
-        if zarr_format == 2:
-            # match |S1, |S2, etc
-            return isinstance(data, str) and re.match(r"^\|S\d+$", data) is not None
-        elif zarr_format == 3:
-            return (
-                isinstance(data, dict)
-                and "name" in data
-                and data["name"] == cls._zarr_v3_name
-                and "configuration" in data
-                and isinstance(data["configuration"], dict)
-                and "length_bits" in data["configuration"]
-                and isinstance(data["configuration"]["length_bits"], int)
-            )
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
-
-    def to_json(self, zarr_format: ZarrFormat) -> JSON:
-        if zarr_format == 2:
-            return self.to_dtype().str
-        elif zarr_format == 3:
-            return {
-                "name": self._zarr_v3_name,
-                "configuration": {"length_bits": self.length * self.item_size_bits},
-            }
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
-
-    @classmethod
-    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
-        if zarr_format == 2:
-            return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
-        elif zarr_format == 3:
-            return cls(length=data["configuration"]["length_bits"] // cls.item_size_bits)  # type: ignore[arg-type, index, call-overload, operator]
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
-
-    def default_value(self) -> np.bytes_:
-        return np.bytes_(b"")
-
-    def to_json_value(self, data: object, *, zarr_format: ZarrFormat) -> str:
-        return base64.standard_b64encode(data).decode("ascii")  # type: ignore[arg-type]
-
-    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.bytes_:
-        if check_json_str(data):
-            return self.to_dtype().type(base64.standard_b64decode(data.encode("ascii")))
-        raise TypeError(f"Invalid type: {data}. Expected a string.")
-
-    def check_value(self, data: object) -> bool:
-        return isinstance(data, np.bytes_ | str | bytes)
-
-    def _cast_value_unsafe(self, value: object) -> np.bytes_:
-        return self.to_dtype().type(value)
-
-
-@dataclass(frozen=True, kw_only=True)
-class FixedLengthBytes(ZDType[np.dtypes.VoidDType[int], np.void], HasLength):
-    # np.dtypes.VoidDType is specified in an odd way in numpy
-    # it cannot be used to create instances of the dtype
-    # so we have to tell mypy to ignore this here
-    dtype_cls = np.dtypes.VoidDType  # type: ignore[assignment]
-    _zarr_v3_name = "numpy.void"
-    item_size_bits: ClassVar[int] = 8
-
-    @classmethod
-    def _from_dtype_unsafe(cls, dtype: _BaseDType) -> Self:
-        return cls(length=dtype.itemsize // (cls.item_size_bits // 8))
-
-    def to_dtype(self) -> np.dtypes.VoidDType[int]:
-        # Numpy does not allow creating a void type
-        # by invoking np.dtypes.VoidDType directly
-        return cast("np.dtypes.VoidDType[int]", np.dtype(f"V{self.length}"))
-
-    @classmethod
-    def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[JSON]:
-        if zarr_format == 2:
-            # Check that the dtype is |V1, |V2, ...
-            return isinstance(data, str) and re.match(r"^\|V\d+$", data) is not None
-        elif zarr_format == 3:
-            return (
-                isinstance(data, dict)
-                and "name" in data
-                and isinstance(data["name"], str)
-                and (re.match(r"^r\d+$", data["name"]) is not None)
-            )
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
-
-    def to_json(self, zarr_format: ZarrFormat) -> JSON:
-        if zarr_format == 2:
-            return self.to_dtype().str
-        elif zarr_format == 3:
-            return {"name": f"r{self.length * self.item_size_bits}"}
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
-
-    @classmethod
-    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
-        if zarr_format == 2:
-            return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
-        elif zarr_format == 3:
-            return cls(length=int(data["name"][1:]) // cls.item_size_bits)  # type: ignore[arg-type, index, call-overload]
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
-
-    @classmethod
-    def check_dtype(cls: type[Self], dtype: _BaseDType) -> TypeGuard[np.dtypes.VoidDType[Any]]:
-        """
-        Numpy void dtype comes in two forms:
-        * If the ``fields`` attribute is ``None``, then the dtype represents N raw bytes.
-        * If the ``fields`` attribute is not ``None``, then the dtype represents a structured dtype,
-
-        In this check we ensure that ``fields`` is ``None``.
-
-        Parameters
-        ----------
-        dtype : TDType
-            The dtype to check.
-
-        Returns
-        -------
-        Bool
-            True if the dtype matches, False otherwise.
-        """
-        return cls.dtype_cls is type(dtype) and dtype.fields is None  # type: ignore[has-type]
-
-    def default_value(self) -> np.void:
-        return self.to_dtype().type(("\x00" * self.length).encode("ascii"))
-
-    def to_json_value(self, data: object, *, zarr_format: ZarrFormat) -> str:
-        return base64.standard_b64encode(self.cast_value(data).tobytes()).decode("ascii")
-
-    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.void:
-        if check_json_str(data):
-            return self.to_dtype().type(base64.standard_b64decode(data))
-        raise DataTypeValidationError(f"Invalid type: {data}. Expected a string.")
-
-    def check_value(self, data: object) -> bool:
-        return isinstance(data, np.bytes_ | str | bytes | np.void)
-
-    def _cast_value_unsafe(self, value: object) -> np.void:
-        return self.to_dtype().type(value)  # type: ignore[call-overload, no-any-return]
-
-
-@dataclass(frozen=True, kw_only=True)
-class FixedLengthUnicode(ZDType[np.dtypes.StrDType[int], np.str_], HasEndianness, HasLength):
-    dtype_cls = np.dtypes.StrDType
-    _zarr_v3_name = "numpy.fixed_length_ucs4"
-    item_size_bits: ClassVar[int] = 32  # UCS4 is 32 bits per code point
-
-    @classmethod
-    def _from_dtype_unsafe(cls, dtype: _BaseDType) -> Self:
-        byte_order = cast("EndiannessNumpy", dtype.byteorder)
-        return cls(
-            length=dtype.itemsize // (cls.item_size_bits // 8),
-            endianness=endianness_from_numpy_str(byte_order),
-        )
-
-    def to_dtype(self) -> np.dtypes.StrDType[int]:
-        byte_order = endianness_to_numpy_str(self.endianness)
-        return self.dtype_cls(self.length).newbyteorder(byte_order)
-
-    @classmethod
-    def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[JSON]:
-        """
-        Check that the input is a valid JSON representation of a numpy S dtype.
-        """
-        if zarr_format == 2:
-            # match >U1, <U2, etc
-            return isinstance(data, str) and re.match(r"^[><]U\d+$", data) is not None
-        elif zarr_format == 3:
-            return (
-                isinstance(data, dict)
-                and "name" in data
-                and data["name"] == cls._zarr_v3_name
-                and "configuration" in data
-                and isinstance(data["configuration"], dict)
-                and "length_bits" in data["configuration"]
-                and isinstance(data["configuration"]["length_bits"], int)
-            )
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
-
-    def to_json(self, zarr_format: ZarrFormat) -> JSON:
-        if zarr_format == 2:
-            return self.to_dtype().str
-        elif zarr_format == 3:
-            return {
-                "name": self._zarr_v3_name,
-                "configuration": {"length_bits": self.length * self.item_size_bits},
-            }
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
-
-    @classmethod
-    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
-        if zarr_format == 2:
-            return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
-        elif zarr_format == 3:
-            return cls(length=data["configuration"]["length_bits"] // cls.item_size_bits)  # type: ignore[arg-type, index, call-overload, operator]
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
-
-    def default_value(self) -> np.str_:
-        return np.str_("")
-
-    def to_json_value(self, data: object, *, zarr_format: ZarrFormat) -> str:
-        return str(data)
-
-    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.str_:
-        if not check_json_str(data):
-            raise TypeError(f"Invalid type: {data}. Expected a string.")
-        return self.to_dtype().type(data)
-
-    def check_value(self, data: object) -> bool:
-        return isinstance(data, str | np.str_ | bytes)
-
-    def _cast_value_unsafe(self, value: object) -> np.str_:
-        return self.to_dtype().type(value)
-
-
-_NUMPY_SUPPORTS_VLEN_STRING = hasattr(np.dtypes, "StringDType")
-
-
-if _NUMPY_SUPPORTS_VLEN_STRING:
-
-    @dataclass(frozen=True, kw_only=True)
-    class VariableLengthString(ZDType[np.dtypes.StringDType, str]):  # type: ignore[type-var]
-        dtype_cls = np.dtypes.StringDType
-        _zarr_v3_name = "numpy.variable_length_utf8"
-
-        @classmethod
-        def _from_dtype_unsafe(cls, dtype: _BaseDType) -> Self:
-            return cls()
-
-        def to_dtype(self) -> np.dtypes.StringDType:
-            return self.dtype_cls()
-
-        @classmethod
-        def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[JSON]:
-            """
-            Check that the input is a valid JSON representation of a numpy string dtype.
-            """
-            if zarr_format == 2:
-                # TODO: take the entire metadata document in here, and
-                # check the compressors / filters for vlen-utf8
-                # Note that we are checking for the object dtype name.
-                return data == "|O"
-            elif zarr_format == 3:
-                return data == cls._zarr_v3_name
-            raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
-
-        def to_json(self, zarr_format: ZarrFormat) -> JSON:
-            if zarr_format == 2:
-                # Note: unlike many other numpy data types, we don't serialize the .str attribute
-                # of the data type to JSON. This is because Zarr was using `|O` for strings before the
-                # numpy variable length string data type existed, and we want to be consistent with
-                # that practice
-                return "|O"
-            elif zarr_format == 3:
-                return self._zarr_v3_name
-            raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
-
-        @classmethod
-        def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
-            return cls()
-
-        def default_value(self) -> str:
-            return ""
-
-        def to_json_value(self, data: object, *, zarr_format: ZarrFormat) -> str:
-            return str(data)
-
-        def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> str:
-            if not check_json_str(data):
-                raise TypeError(f"Invalid type: {data}. Expected a string.")
-            return data
-
-        def check_value(self, data: object) -> bool:
-            return isinstance(data, str)
-
-        def _cast_value_unsafe(self, value: object) -> str:
-            return str(value)
-
-else:
-    # Numpy pre-2 does not have a variable length string dtype, so we use the Object dtype instead.
-    @dataclass(frozen=True, kw_only=True)
-    class VariableLengthString(ZDType[np.dtypes.ObjectDType, str]):  # type: ignore[no-redef]
-        dtype_cls = np.dtypes.ObjectDType
-        _zarr_v3_name = "numpy.variable_length_utf8"
-
-        @classmethod
-        def _from_dtype_unsafe(cls, dtype: _BaseDType) -> Self:
-            return cls()
-
-        def to_dtype(self) -> np.dtypes.ObjectDType:
-            return self.dtype_cls()
-
-        @classmethod
-        def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[JSON]:
-            """
-            Check that the input is a valid JSON representation of a numpy O dtype.
-            """
-            if zarr_format == 2:
-                # TODO: take the entire metadata document in here, and
-                # check the compressors / filters for vlen-utf8
-                return data == "|O"
-            elif zarr_format == 3:
-                return data == cls._zarr_v3_name
-            raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
-
-        def to_json(self, zarr_format: ZarrFormat) -> JSON:
-            if zarr_format == 2:
-                return self.to_dtype().str
-            elif zarr_format == 3:
-                return self._zarr_v3_name
-            raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
-
-        @classmethod
-        def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
-            return cls()
-
-        def default_value(self) -> str:
-            return ""
-
-        def to_json_value(self, data: object, *, zarr_format: ZarrFormat) -> str:
-            return data  # type: ignore[return-value]
-
-        def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> str:
-            """
-            Strings pass through
-            """
-            if not check_json_str(data):
-                raise TypeError(f"Invalid type: {data}. Expected a string.")
-            return data
-
-        def check_value(self, data: object) -> bool:
-            return isinstance(data, str)
-
-        def _cast_value_unsafe(self, value: object) -> str:
-            return str(value)
-
-
-DateUnit = Literal["Y", "M", "W", "D"]
-TimeUnit = Literal["h", "m", "s", "ms", "us", "μs", "ns", "ps", "fs", "as"]
-
-
-@dataclass(frozen=True, kw_only=True, slots=True)
-class DateTime64(ZDType[np.dtypes.DateTime64DType, np.datetime64], HasEndianness):
-    dtype_cls = np.dtypes.DateTime64DType  # type: ignore[assignment]
-    _zarr_v3_name = "numpy.datetime64"
-    unit: DateUnit | TimeUnit
-
-    @classmethod
-    def _from_dtype_unsafe(cls, dtype: _BaseDType) -> Self:
-        unit: DateUnit | TimeUnit = dtype.name[dtype.name.rfind("[") + 1 : dtype.name.rfind("]")]  # type: ignore[assignment]
-        if unit not in get_args(DateUnit) and unit not in get_args(TimeUnit):
-            raise DataTypeValidationError('Invalid unit for "numpy.datetime64"')
-        byteorder = cast("EndiannessNumpy", dtype.byteorder)
-        return cls(unit=unit, endianness=endianness_from_numpy_str(byteorder))
-
-    def to_dtype(self) -> np.dtypes.DateTime64DType:
-        # Numpy does not allow creating datetime64 via
-        # np.dtypes.DateTime64Dtype()
-        return cast(
-            "np.dtypes.DateTime64DType",
-            np.dtype(f"datetime64[{self.unit}]").newbyteorder(
-                endianness_to_numpy_str(self.endianness)
-            ),
-        )
-
-    @classmethod
-    def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[JSON]:
-        if zarr_format == 2:
-            # match <M[ns], >M[M], etc
-            # consider making this a standalone function
-            return (
-                isinstance(data, str)
-                and len(data) in (6, 7)
-                and data[0] in (">", "<")
-                and data[1:4] == "M8["
-                and data[4:-1] in get_args(TimeUnit) + get_args(DateUnit)
-                and data[-1] == "]"
-            )
-        elif zarr_format == 3:
-            return (
-                isinstance(data, dict)
-                and "name" in data
-                and data["name"] == cls._zarr_v3_name
-                and "configuration" in data
-                and "unit" in data["configuration"]
-                and data["configuration"]["unit"] in get_args(DateUnit) + get_args(TimeUnit)
-            )
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
-
-    def default_value(self) -> np.datetime64:
-        return np.datetime64("NaT")
-
-    def to_json(self, zarr_format: ZarrFormat) -> JSON:
-        if zarr_format == 2:
-            return self.to_dtype().str
-        elif zarr_format == 3:
-            return {"name": self._zarr_v3_name, "configuration": {"unit": self.unit}}
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
-
-    @classmethod
-    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
-        if zarr_format == 2:
-            return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
-        elif zarr_format == 3:
-            return cls(unit=data["configuration"]["unit"])  # type: ignore[arg-type, index, call-overload]
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
-
-    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.datetime64:
-        if check_json_int(data):
-            return datetime_from_json(data, self.unit)
-        raise TypeError(f"Invalid type: {data}. Expected an integer.")
-
-    def to_json_value(self, data: object, *, zarr_format: ZarrFormat) -> int:
-        return datetime_to_json(data)  # type: ignore[arg-type]
-
-    def check_value(self, data: object) -> bool:
-        # not sure which values we should accept for structured dtypes.
-        try:
-            np.array([data], dtype=self.to_dtype())
-            return True  # noqa: TRY300
-        except ValueError:
-            return False
-
-    def _cast_value_unsafe(self, value: object) -> np.datetime64:
-        return self.to_dtype().type(value)  # type: ignore[no-any-return, call-overload]
-
-
-@dataclass(frozen=True, kw_only=True)
-class Structured(ZDType[np.dtypes.VoidDType[int], np.void]):
-    dtype_cls = np.dtypes.VoidDType  # type: ignore[assignment]
-    _zarr_v3_name = "structured"
-    fields: tuple[tuple[str, ZDType[_BaseDType, _BaseScalar]], ...]
-
-    def default_value(self) -> np.void:
-        return self._cast_value_unsafe(0)
-
-    def _cast_value_unsafe(self, value: object) -> np.void:
-        return cast("np.void", np.array([value], dtype=self.to_dtype())[0])
-
-    @classmethod
-    def check_dtype(cls, dtype: _BaseDType) -> TypeGuard[np.dtypes.VoidDType[int]]:
-        """
-        Check that this dtype is a numpy structured dtype
-
-        Parameters
-        ----------
-        dtype : np.dtypes.DTypeLike
-            The dtype to check.
-
-        Returns
-        -------
-        TypeGuard[np.dtypes.VoidDType]
-            True if the dtype matches, False otherwise.
-        """
-        return super().check_dtype(dtype) and dtype.fields is not None
-
-    @classmethod
-    def _from_dtype_unsafe(cls, dtype: _BaseDType) -> Self:
-        from zarr.core.dtype import get_data_type_from_native_dtype
-
-        fields: list[tuple[str, ZDType[_BaseDType, _BaseScalar]]] = []
-
-        if dtype.fields is None:
-            raise ValueError("numpy dtype has no fields")
-
-        # fields of a structured numpy dtype are either 2-tuples or 3-tuples. we only
-        # care about the first element in either case.
-        for key, (dtype_instance, *_) in dtype.fields.items():
-            dtype_wrapped = get_data_type_from_native_dtype(dtype_instance)
-            fields.append((key, dtype_wrapped))
-
-        return cls(fields=tuple(fields))
-
-    def to_json(self, zarr_format: ZarrFormat) -> JSON:
-        fields = [
-            (f_name, f_dtype.to_json(zarr_format=zarr_format)) for f_name, f_dtype in self.fields
-        ]
-        if zarr_format == 2:
-            return fields
-        elif zarr_format == 3:
-            base_dict = {"name": self._zarr_v3_name}
-            base_dict["configuration"] = {"fields": fields}  # type: ignore[assignment]
-            return cast("JSON", base_dict)
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
-
-    @classmethod
-    def check_json(
-        cls, data: JSON, zarr_format: ZarrFormat
-    ) -> TypeGuard[dict[str, JSON] | list[Any]]:
-        # the actual JSON form is recursive and hard to annotate, so we give up and do
-        # list[Any] for now
-        if zarr_format == 2:
-            return (
-                not isinstance(data, str)
-                and isinstance(data, Sequence)
-                and all(
-                    not isinstance(field, str) and isinstance(field, Sequence) and len(field) == 2
-                    for field in data
-                )
-            )
-        elif zarr_format == 3:
-            return (
-                isinstance(data, dict)
-                and "name" in data
-                and "configuration" in data
-                and isinstance(data["configuration"], dict)
-                and "fields" in data["configuration"]
-            )
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
-
-    @classmethod
-    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
-        from zarr.core.dtype import get_data_type_from_json
-
-        if cls.check_json(data, zarr_format=zarr_format):
-            if zarr_format == 2:
-                # structured dtypes are constructed directly from a list of lists
-                return cls(
-                    fields=tuple(  # type: ignore[misc]
-                        (f_name, get_data_type_from_json(f_dtype, zarr_format=zarr_format))
-                        for f_name, f_dtype in data
-                    )
-                )
-            elif zarr_format == 3:  # noqa: SIM102
-                if isinstance(data, dict) and "configuration" in data:
-                    config = data["configuration"]
-                    if isinstance(config, dict) and "fields" in config:
-                        meta_fields = config["fields"]
-                        fields = tuple(
-                            (f_name, get_data_type_from_json(f_dtype, zarr_format=zarr_format))
-                            for f_name, f_dtype in meta_fields
-                        )
-                        return cls(fields=fields)
-            raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
-        raise DataTypeValidationError(f"Invalid JSON representation of data type {cls}.")
-
-    def to_dtype(self) -> np.dtypes.VoidDType[int]:
-        return cast(
-            "np.dtypes.VoidDType[int]",
-            np.dtype([(key, dtype.to_dtype()) for (key, dtype) in self.fields]),
-        )
-
-    def to_json_value(self, data: object, *, zarr_format: ZarrFormat) -> str:
-        return bytes_to_json(self.cast_value(data).tobytes(), zarr_format)
-
-    def check_value(self, data: object) -> bool:
-        # not sure which values we should accept for structured dtypes.
-        try:
-            np.array([data], dtype=self.to_dtype())
-            return True  # noqa: TRY300
-        except ValueError:
-            return False
-
-    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.void:
-        if not check_json_str(data):
-            raise TypeError(f"Invalid type: {data}. Expected a string.")
-        as_bytes = bytes_from_json(data, zarr_format=zarr_format)
-        dtype = self.to_dtype()
-        return cast("np.void", np.array([as_bytes], dtype=dtype.str).view(dtype)[0])
-
-
-def endianness_to_numpy_str(endianness: Endianness | None) -> EndiannessNumpy:
-    """
-    Convert an endianness literal to its numpy string representation.
-
-    Parameters
-    ----------
-    endianness : Endianness or None
-        The endianness to convert.
-
-    Returns
-    -------
-    Literal[">", "<", "|"]
-        The numpy string representation of the endianness.
-
-    Raises
-    ------
-    ValueError
-        If the endianness is invalid.
-    """
-    match endianness:
-        case "little":
-            return "<"
-        case "big":
-            return ">"
-        case None:
-            return "|"
-    raise ValueError(
-        f"Invalid endianness: {endianness}. Expected one of {get_args(Endianness)} or None"
-    )
-
-
-def endianness_from_numpy_str(endianness: EndiannessNumpy) -> Endianness | None:
-    """
-    Convert a numpy endianness string literal to a human-readable literal value.
-
-    Parameters
-    ----------
-    endianness : Literal[">", "<", "=", "|"]
-        The numpy string representation of the endianness.
-
-    Returns
-    -------
-    Endianness or None
-        The human-readable representation of the endianness.
-
-    Raises
-    ------
-    ValueError
-        If the endianness is invalid.
-    """
-    match endianness:
-        case "=":
-            return sys.byteorder
-        case "<":
-            return "little"
-        case ">":
-            return "big"
-        case "|":
-            return None
-    raise ValueError(
-        f"Invalid endianness: {endianness}. Expected one of {get_args(EndiannessNumpy)}"
-    )
diff --git a/src/zarr/core/dtype/common.py b/src/zarr/core/dtype/common.py
index 900b3fddbd..657f56bfb7 100644
--- a/src/zarr/core/dtype/common.py
+++ b/src/zarr/core/dtype/common.py
@@ -1,14 +1,7 @@
 from __future__ import annotations
 
-import base64
-from collections.abc import Sequence
-from typing import TYPE_CHECKING, Any, Literal, TypeGuard, cast
-
-import numpy as np
-
-if TYPE_CHECKING:
-    from zarr.core.common import JSON, ZarrFormat
-    from zarr.core.dtype._numpy import DateUnit, TimeUnit
+from dataclasses import dataclass
+from typing import Literal
 
 Endianness = Literal["little", "big"]
 JSONFloat = float | Literal["NaN", "Infinity", "-Infinity"]
@@ -17,504 +10,20 @@
 class DataTypeValidationError(ValueError): ...
 
 
-def check_json_bool(data: JSON) -> TypeGuard[bool]:
-    """
-    Check if a JSON value is a boolean.
-
-    Parameters
-    ----------
-    data : JSON
-        The JSON value to check.
-
-    Returns
-    -------
-    Bool
-        True if the data is a boolean, False otherwise.
-    """
-    return isinstance(data, bool)
-
-
-def check_json_str(data: JSON) -> TypeGuard[str]:
-    """
-    Check if a JSON value is a string.
-
-    Parameters
-    ----------
-    data : JSON
-        The JSON value to check.
-
-    Returns
-    -------
-    Bool
-        True if the data is a string, False otherwise.
-    """
-    return bool(isinstance(data, str))
-
-
-def check_json_int(data: JSON) -> TypeGuard[int]:
-    """
-    Check if a JSON value is an integer.
-
-    Parameters
-    ----------
-    data : JSON
-        The JSON value to check.
-
-    Returns
-    -------
-    Bool
-        True if the data is an integer, False otherwise.
-    """
-    return bool(isinstance(data, int))
-
-
-def check_json_float_v2(data: JSON) -> TypeGuard[JSONFloat]:
-    """
-    Check if a JSON value represents a float (v2).
-
-    Parameters
-    ----------
-    data : JSON
-        The JSON value to check.
-
-    Returns
-    -------
-    Bool
-        True if the data is a float, False otherwise.
-    """
-    if data == "NaN" or data == "Infinity" or data == "-Infinity":
-        return True
-    return isinstance(data, float | int)
-
-
-def check_json_float_v3(data: JSON) -> TypeGuard[JSONFloat]:
-    """
-    Check if a JSON value represents a float (v3).
-
-    Parameters
-    ----------
-    data : JSON
-        The JSON value to check.
-
-    Returns
-    -------
-    Bool
-        True if the data is a float, False otherwise.
-    """
-    # TODO: handle the special JSON serialization of different NaN values
-    return check_json_float_v2(data)
-
-
-def check_json_float(data: JSON, zarr_format: ZarrFormat) -> TypeGuard[float]:
-    """
-    Check if a JSON value represents a float based on zarr format.
-
-    Parameters
-    ----------
-    data : JSON
-        The JSON value to check.
-    zarr_format : ZarrFormat
-        The zarr format version.
-
-    Returns
-    -------
-    Bool
-        True if the data is a float, False otherwise.
-    """
-    if zarr_format == 2:
-        return check_json_float_v2(data)
-    else:
-        return check_json_float_v3(data)
-
-
-def check_json_complex_float_v3(data: JSON) -> TypeGuard[tuple[JSONFloat, JSONFloat]]:
-    """
-    Check if a JSON value represents a complex float, as per the zarr v3 spec
-
-    Parameters
-    ----------
-    data : JSON
-        The JSON value to check.
-
-    Returns
-    -------
-    Bool
-        True if the data is a complex float, False otherwise.
-    """
-    return (
-        not isinstance(data, str)
-        and isinstance(data, Sequence)
-        and len(data) == 2
-        and check_json_float_v3(data[0])
-        and check_json_float_v3(data[1])
-    )
-
-
-def check_json_complex_float_v2(data: JSON) -> TypeGuard[tuple[JSONFloat, JSONFloat]]:
-    """
-    Check if a JSON value represents a complex float, as per the behavior of zarr-python 2.x
-
-    Parameters
-    ----------
-    data : JSON
-        The JSON value to check.
-
-    Returns
-    -------
-    Bool
-        True if the data is a complex float, False otherwise.
-    """
-    return (
-        not isinstance(data, str)
-        and isinstance(data, Sequence)
-        and len(data) == 2
-        and check_json_float_v2(data[0])
-        and check_json_float_v2(data[1])
-    )
-
-
-def check_json_complex_float(
-    data: JSON, zarr_format: ZarrFormat
-) -> TypeGuard[tuple[JSONFloat, JSONFloat]]:
-    """
-    Check if a JSON value represents a complex float based on zarr format.
-
-    Parameters
-    ----------
-    data : JSON
-        The JSON value to check.
-    zarr_format : ZarrFormat
-        The zarr format version.
-
-    Returns
-    -------
-    Bool
-        True if the data represents a complex float, False otherwise.
-    """
-    if zarr_format == 2:
-        return check_json_complex_float_v2(data)
-    return check_json_complex_float_v3(data)
-
-
-def float_to_json_v2(data: float | np.floating[Any]) -> JSONFloat:
-    """
-    Convert a float to JSON (v2).
-
-    Parameters
-    ----------
-    data : float or np.floating
-        The float value to convert.
-
-    Returns
-    -------
-    JSONFloat
-        The JSON representation of the float.
-    """
-    if np.isnan(data):
-        return "NaN"
-    elif np.isinf(data):
-        return "Infinity" if data > 0 else "-Infinity"
-    return float(data)
-
-
-def float_to_json_v3(data: float | np.floating[Any]) -> JSONFloat:
-    """
-    Convert a float to JSON (v3).
-
-    Parameters
-    ----------
-    data : float or np.floating
-        The float value to convert.
-
-    Returns
-    -------
-    JSONFloat
-        The JSON representation of the float.
-    """
-    # v3 can in principle handle distinct NaN values, but numpy does not represent these explicitly
-    # so we just reuse the v2 routine here
-    return float_to_json_v2(data)
-
-
-def float_to_json(data: float | np.floating[Any], zarr_format: ZarrFormat) -> JSONFloat:
-    """
-    Convert a float to JSON, parametrized by the zarr format version.
-
-    Parameters
-    ----------
-    data : float or np.floating
-        The float value to convert.
-    zarr_format : ZarrFormat
-        The zarr format version.
-
-    Returns
-    -------
-    JSONFloat
-        The JSON representation of the float.
+@dataclass(frozen=True)
+class HasLength:
     """
-    if zarr_format == 2:
-        return float_to_json_v2(data)
-    else:
-        return float_to_json_v3(data)
-    raise ValueError(f"Invalid zarr format: {zarr_format}. Expected 2 or 3.")
-
-
-def complex_to_json_v2(data: complex | np.complexfloating[Any, Any]) -> tuple[JSONFloat, JSONFloat]:
-    """
-    Convert a complex number to JSON (v2).
-
-    Parameters
-    ----------
-    data : complex or np.complexfloating
-        The complex value to convert.
-
-    Returns
-    -------
-    tuple[JSONFloat, JSONFloat]
-        The JSON representation of the complex number.
-    """
-    return float_to_json_v2(data.real), float_to_json_v2(data.imag)
-
-
-def complex_to_json_v3(data: complex | np.complexfloating[Any, Any]) -> tuple[JSONFloat, JSONFloat]:
-    """
-    Convert a complex number to JSON (v3).
-
-    Parameters
-    ----------
-    data : complex or np.complexfloating
-        The complex value to convert.
-
-    Returns
-    -------
-    tuple[JSONFloat, JSONFloat]
-        The JSON representation of the complex number.
-    """
-    return float_to_json_v3(data.real), float_to_json_v3(data.imag)
-
-
-def complex_float_to_json(
-    data: complex | np.complexfloating[Any, Any], zarr_format: ZarrFormat
-) -> tuple[JSONFloat, JSONFloat]:
-    """
-    Convert a complex number to JSON, parametrized by the zarr format version.
-
-    Parameters
-    ----------
-    data : complex or np.complexfloating
-        The complex value to convert.
-    zarr_format : ZarrFormat
-        The zarr format version.
-
-    Returns
-    -------
-    tuple[JSONFloat, JSONFloat] or JSONFloat
-        The JSON representation of the complex number.
-    """
-    if zarr_format == 2:
-        return complex_to_json_v2(data)
-    else:
-        return complex_to_json_v3(data)
-    raise ValueError(f"Invalid zarr format: {zarr_format}. Expected 2 or 3.")
-
-
-def bytes_to_json(data: bytes, zarr_format: ZarrFormat) -> str:
-    """
-    Convert bytes to JSON.
-
-    Parameters
-    ----------
-    data : bytes
-        The bytes to store.
-    zarr_format : ZarrFormat
-        The zarr format version.
-
-    Returns
-    -------
-    str
-        The bytes encoded as ascii using the base64 alphabet.
-    """
-    # TODO: decide if we are going to make this implementation zarr format-specific
-    return base64.b64encode(data).decode("ascii")
-
-
-def bytes_from_json(data: str, zarr_format: ZarrFormat) -> bytes:
-    """
-    Convert a JSON string to bytes
-
-    Parameters
-    ----------
-    data : str
-        The JSON string to convert.
-    zarr_format : ZarrFormat
-        The zarr format version.
-
-    Returns
-    -------
-    bytes
-        The bytes.
+    A mix-in class for data types with a length attribute, such as fixed-size collections
+    of unicode strings, or bytes.
     """
-    if zarr_format == 2:
-        return base64.b64decode(data.encode("ascii"))
-    # TODO: differentiate these as needed. This is a spec question.
-    if zarr_format == 3:
-        return base64.b64decode(data.encode("ascii"))
-    raise ValueError(f"Invalid zarr format: {zarr_format}. Expected 2 or 3.")
 
+    length: int
 
-def float_from_json_v2(data: JSONFloat) -> float:
-    """
-    Convert a JSON float to a float (Zarr v2).
 
-    Parameters
-    ----------
-    data : JSONFloat
-        The JSON float to convert.
-
-    Returns
-    -------
-    float
-        The float value.
+@dataclass(frozen=True)
+class HasEndianness:
     """
-    match data:
-        case "NaN":
-            return float("nan")
-        case "Infinity":
-            return float("inf")
-        case "-Infinity":
-            return float("-inf")
-        case _:
-            return float(data)
-
-
-def float_from_json_v3(data: JSONFloat) -> float:
-    """
-    Convert a JSON float to a float (v3).
-
-    Parameters
-    ----------
-    data : JSONFloat
-        The JSON float to convert.
-
-    Returns
-    -------
-    float
-        The float value.
+    A mix-in class for data types with an endianness attribute
     """
-    # todo: support the v3-specific NaN handling
-    return float_from_json_v2(data)
 
-
-def float_from_json(data: JSONFloat, zarr_format: ZarrFormat) -> float:
-    """
-    Convert a JSON float to a float based on zarr format.
-
-    Parameters
-    ----------
-    data : JSONFloat
-        The JSON float to convert.
-    zarr_format : ZarrFormat
-        The zarr format version.
-
-    Returns
-    -------
-    float
-        The float value.
-    """
-    if zarr_format == 2:
-        return float_from_json_v2(data)
-    else:
-        return float_from_json_v3(data)
-
-
-def complex_float_from_json_v2(data: tuple[JSONFloat, JSONFloat]) -> complex:
-    """
-    Convert a JSON complex float to a complex number (v2).
-
-    Parameters
-    ----------
-    data : tuple[JSONFloat, JSONFloat]
-        The JSON complex float to convert.
-
-    Returns
-    -------
-    np.complexfloating
-        The complex number.
-    """
-    return complex(float_from_json_v2(data[0]), float_from_json_v2(data[1]))
-
-
-def complex_float_from_json_v3(data: tuple[JSONFloat, JSONFloat]) -> complex:
-    """
-    Convert a JSON complex float to a complex number (v3).
-
-    Parameters
-    ----------
-    data : tuple[JSONFloat, JSONFloat]
-        The JSON complex float to convert.
-
-    Returns
-    -------
-    np.complexfloating
-        The complex number.
-    """
-    return complex(float_from_json_v3(data[0]), float_from_json_v3(data[1]))
-
-
-def complex_float_from_json(data: tuple[JSONFloat, JSONFloat], zarr_format: ZarrFormat) -> complex:
-    """
-    Convert a JSON complex float to a complex number based on zarr format.
-
-    Parameters
-    ----------
-    data : tuple[JSONFloat, JSONFloat]
-        The JSON complex float to convert.
-    zarr_format : ZarrFormat
-        The zarr format version.
-
-    Returns
-    -------
-    np.complexfloating
-        The complex number.
-    """
-    if zarr_format == 2:
-        return complex_float_from_json_v2(data)
-    else:
-        return complex_float_from_json_v3(data)
-    raise ValueError(f"Invalid zarr format: {zarr_format}. Expected 2 or 3.")
-
-
-def datetime_to_json(data: np.datetime64) -> int:
-    """
-    Convert a datetime64 to a JSON integer.
-
-    Parameters
-    ----------
-    data : np.datetime64
-        The datetime64 value to convert.
-
-    Returns
-    -------
-    int
-        The JSON representation of the datetime64.
-    """
-    return data.view(np.int64).item()
-
-
-def datetime_from_json(data: int, unit: DateUnit | TimeUnit) -> np.datetime64:
-    """
-    Convert a JSON integer to a datetime64.
-
-    Parameters
-    ----------
-    data : int
-        The JSON integer to convert.
-    unit : DateUnit or TimeUnit
-        The unit of the datetime64.
-
-    Returns
-    -------
-    np.datetime64
-        The datetime64 value.
-    """
-    return cast("np.datetime64", np.int64(data).view(f"datetime64[{unit}]"))
+    endianness: Endianness | None = "little"
diff --git a/src/zarr/core/dtype/npy/__init__.py b/src/zarr/core/dtype/npy/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/src/zarr/core/dtype/npy/bool.py b/src/zarr/core/dtype/npy/bool.py
new file mode 100644
index 0000000000..293d8383c0
--- /dev/null
+++ b/src/zarr/core/dtype/npy/bool.py
@@ -0,0 +1,114 @@
+from dataclasses import dataclass
+from typing import ClassVar, Literal, Self, TypeGuard
+
+import numpy as np
+
+from zarr.core.common import JSON, ZarrFormat
+from zarr.core.dtype.npy.common import check_json_bool
+from zarr.core.dtype.wrapper import ZDType, _BaseDType
+
+
+@dataclass(frozen=True, kw_only=True, slots=True)
+class Bool(ZDType[np.dtypes.BoolDType, np.bool_]):
+    """
+    Wrapper for numpy boolean dtype.
+
+    Attributes
+    ----------
+    name : str
+        The name of the dtype.
+    dtype_cls : ClassVar[type[np.dtypes.BoolDType]]
+        The numpy dtype class.
+    """
+
+    _zarr_v3_name = "bool"
+    _zarr_v2_names: ClassVar[tuple[str, ...]] = ("|b1",)
+    dtype_cls = np.dtypes.BoolDType
+
+    @classmethod
+    def _from_dtype_unsafe(cls, dtype: _BaseDType) -> Self:
+        return cls()
+
+    def to_dtype(self: Self) -> np.dtypes.BoolDType:
+        return self.dtype_cls()
+
+    @classmethod
+    def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[Literal["bool", "|b1"]]:
+        """
+        Check that the input is a valid JSON representation of a bool.
+        """
+        if zarr_format == 2:
+            return data in cls._zarr_v2_names
+        elif zarr_format == 3:
+            return data == cls._zarr_v3_name
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+
+    def to_json(self, zarr_format: ZarrFormat) -> str:
+        if zarr_format == 2:
+            return self.to_dtype().str
+        elif zarr_format == 3:
+            return self._zarr_v3_name
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+
+    @classmethod
+    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
+        return cls()
+
+    def default_value(self) -> np.bool_:
+        """
+        Get the default value for the boolean dtype.
+
+        Returns
+        -------
+        np.bool_
+            The default value.
+        """
+        return np.False_
+
+    def to_json_value(self, data: object, zarr_format: ZarrFormat) -> bool:
+        """
+        Convert a scalar to a python bool.
+
+        Parameters
+        ----------
+        data : object
+            The value to convert.
+        zarr_format : ZarrFormat
+            The zarr format version.
+
+        Returns
+        -------
+        bool
+            The JSON-serializable format.
+        """
+        return bool(data)
+
+    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.bool_:
+        """
+        Read a JSON-serializable value as a numpy boolean scalar.
+
+        Parameters
+        ----------
+        data : JSON
+            The JSON-serializable value.
+        zarr_format : ZarrFormat
+            The zarr format version.
+
+        Returns
+        -------
+        np.bool_
+            The numpy boolean scalar.
+        """
+        if check_json_bool(data):
+            return self._cast_value_unsafe(data)
+        raise TypeError(f"Invalid type: {data}. Expected a boolean.")
+
+    def check_value(self, data: object) -> bool:
+        # Anything can become a bool
+        return True
+
+    def cast_value(self, value: object) -> np.bool_:
+        return self._cast_value_unsafe(value)
+
+    def _cast_value_unsafe(self, value: object) -> np.bool_:
+        return np.bool_(value)
diff --git a/src/zarr/core/dtype/npy/common.py b/src/zarr/core/dtype/npy/common.py
new file mode 100644
index 0000000000..6571002bbb
--- /dev/null
+++ b/src/zarr/core/dtype/npy/common.py
@@ -0,0 +1,578 @@
+from __future__ import annotations
+
+import base64
+import sys
+from collections.abc import Sequence
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Literal,
+    SupportsComplex,
+    SupportsFloat,
+    SupportsIndex,
+    SupportsInt,
+    TypeGuard,
+    TypeVar,
+    get_args,
+)
+
+import numpy as np
+
+from zarr.core.dtype.common import Endianness, JSONFloat
+
+if TYPE_CHECKING:
+    from zarr.core.common import JSON, ZarrFormat
+
+IntLike = SupportsInt | SupportsIndex | bytes | str
+FloatLike = SupportsIndex | SupportsFloat | bytes | str
+ComplexLike = SupportsFloat | SupportsIndex | SupportsComplex | bytes | str | None
+DateUnit = Literal["Y", "M", "W", "D"]
+TimeUnit = Literal["h", "m", "s", "ms", "us", "μs", "ns", "ps", "fs", "as"]
+EndiannessNumpy = Literal[">", "<", "|", "="]
+
+TFloatDType_co = TypeVar(
+    "TFloatDType_co",
+    bound=np.dtypes.Float16DType | np.dtypes.Float32DType | np.dtypes.Float64DType,
+    covariant=True,
+)
+TFloatScalar_co = TypeVar(
+    "TFloatScalar_co", bound=np.float16 | np.float32 | np.float64, covariant=True
+)
+
+TComplexDType_co = TypeVar(
+    "TComplexDType_co", bound=np.dtypes.Complex64DType | np.dtypes.Complex128DType, covariant=True
+)
+TComplexScalar_co = TypeVar("TComplexScalar_co", bound=np.complex64 | np.complex128, covariant=True)
+
+
+def endianness_from_numpy_str(endianness: EndiannessNumpy) -> Endianness | None:
+    """
+    Convert a numpy endianness string literal to a human-readable literal value.
+
+    Parameters
+    ----------
+    endianness : Literal[">", "<", "=", "|"]
+        The numpy string representation of the endianness.
+
+    Returns
+    -------
+    Endianness or None
+        The human-readable representation of the endianness.
+
+    Raises
+    ------
+    ValueError
+        If the endianness is invalid.
+    """
+    match endianness:
+        case "=":
+            # Use the local system endianness
+            return sys.byteorder
+        case "<":
+            return "little"
+        case ">":
+            return "big"
+        case "|":
+            # for dtypes without byte ordering semantics
+            return None
+    raise ValueError(
+        f"Invalid endianness: {endianness}. Expected one of {get_args(EndiannessNumpy)}"
+    )
+
+
+def endianness_to_numpy_str(endianness: Endianness | None) -> EndiannessNumpy:
+    """
+    Convert an endianness literal to its numpy string representation.
+
+    Parameters
+    ----------
+    endianness : Endianness or None
+        The endianness to convert.
+
+    Returns
+    -------
+    Literal[">", "<", "|"]
+        The numpy string representation of the endianness.
+
+    Raises
+    ------
+    ValueError
+        If the endianness is invalid.
+    """
+    match endianness:
+        case "little":
+            return "<"
+        case "big":
+            return ">"
+        case None:
+            return "|"
+    raise ValueError(
+        f"Invalid endianness: {endianness}. Expected one of {get_args(Endianness)} or None"
+    )
+
+
+def float_from_json_v2(data: JSONFloat) -> float:
+    """
+    Convert a JSON float to a float (Zarr v2).
+
+    Parameters
+    ----------
+    data : JSONFloat
+        The JSON float to convert.
+
+    Returns
+    -------
+    float
+        The float value.
+    """
+    match data:
+        case "NaN":
+            return float("nan")
+        case "Infinity":
+            return float("inf")
+        case "-Infinity":
+            return float("-inf")
+        case _:
+            return float(data)
+
+
+def float_from_json_v3(data: JSONFloat) -> float:
+    """
+    Convert a JSON float to a float (v3).
+
+    Parameters
+    ----------
+    data : JSONFloat
+        The JSON float to convert.
+
+    Returns
+    -------
+    float
+        The float value.
+    """
+    # todo: support the v3-specific NaN handling
+    return float_from_json_v2(data)
+
+
+def float_from_json(data: JSONFloat, zarr_format: ZarrFormat) -> float:
+    """
+    Convert a JSON float to a float based on zarr format.
+
+    Parameters
+    ----------
+    data : JSONFloat
+        The JSON float to convert.
+    zarr_format : ZarrFormat
+        The zarr format version.
+
+    Returns
+    -------
+    float
+        The float value.
+    """
+    if zarr_format == 2:
+        return float_from_json_v2(data)
+    else:
+        return float_from_json_v3(data)
+
+
+def bytes_from_json(data: str, zarr_format: ZarrFormat) -> bytes:
+    """
+    Convert a JSON string to bytes
+
+    Parameters
+    ----------
+    data : str
+        The JSON string to convert.
+    zarr_format : ZarrFormat
+        The zarr format version.
+
+    Returns
+    -------
+    bytes
+        The bytes.
+    """
+    if zarr_format == 2:
+        return base64.b64decode(data.encode("ascii"))
+    # TODO: differentiate these as needed. This is a spec question.
+    if zarr_format == 3:
+        return base64.b64decode(data.encode("ascii"))
+    raise ValueError(f"Invalid zarr format: {zarr_format}. Expected 2 or 3.")
+
+
+def bytes_to_json(data: bytes, zarr_format: ZarrFormat) -> str:
+    """
+    Convert bytes to JSON.
+
+    Parameters
+    ----------
+    data : bytes
+        The bytes to store.
+    zarr_format : ZarrFormat
+        The zarr format version.
+
+    Returns
+    -------
+    str
+        The bytes encoded as ascii using the base64 alphabet.
+    """
+    # TODO: decide if we are going to make this implementation zarr format-specific
+    return base64.b64encode(data).decode("ascii")
+
+
+def float_to_json_v2(data: float | np.floating[Any]) -> JSONFloat:
+    """
+    Convert a float to JSON (v2).
+
+    Parameters
+    ----------
+    data : float or np.floating
+        The float value to convert.
+
+    Returns
+    -------
+    JSONFloat
+        The JSON representation of the float.
+    """
+    if np.isnan(data):
+        return "NaN"
+    elif np.isinf(data):
+        return "Infinity" if data > 0 else "-Infinity"
+    return float(data)
+
+
+def float_to_json_v3(data: float | np.floating[Any]) -> JSONFloat:
+    """
+    Convert a float to JSON (v3).
+
+    Parameters
+    ----------
+    data : float or np.floating
+        The float value to convert.
+
+    Returns
+    -------
+    JSONFloat
+        The JSON representation of the float.
+    """
+    # v3 can in principle handle distinct NaN values, but numpy does not represent these explicitly
+    # so we just reuse the v2 routine here
+    return float_to_json_v2(data)
+
+
+def complex_to_json_v3(data: complex | np.complexfloating[Any, Any]) -> tuple[JSONFloat, JSONFloat]:
+    """
+    Convert a complex number to JSON (v3).
+
+    Parameters
+    ----------
+    data : complex or np.complexfloating
+        The complex value to convert.
+
+    Returns
+    -------
+    tuple[JSONFloat, JSONFloat]
+        The JSON representation of the complex number.
+    """
+    return float_to_json_v3(data.real), float_to_json_v3(data.imag)
+
+
+def complex_to_json_v2(data: complex | np.complexfloating[Any, Any]) -> tuple[JSONFloat, JSONFloat]:
+    """
+    Convert a complex number to JSON (v2).
+
+    Parameters
+    ----------
+    data : complex or np.complexfloating
+        The complex value to convert.
+
+    Returns
+    -------
+    tuple[JSONFloat, JSONFloat]
+        The JSON representation of the complex number.
+    """
+    return float_to_json_v2(data.real), float_to_json_v2(data.imag)
+
+
+def complex_float_to_json(
+    data: complex | np.complexfloating[Any, Any], zarr_format: ZarrFormat
+) -> tuple[JSONFloat, JSONFloat]:
+    """
+    Convert a complex number to JSON, parametrized by the zarr format version.
+
+    Parameters
+    ----------
+    data : complex or np.complexfloating
+        The complex value to convert.
+    zarr_format : ZarrFormat
+        The zarr format version.
+
+    Returns
+    -------
+    tuple[JSONFloat, JSONFloat] or JSONFloat
+        The JSON representation of the complex number.
+    """
+    if zarr_format == 2:
+        return complex_to_json_v2(data)
+    else:
+        return complex_to_json_v3(data)
+    raise ValueError(f"Invalid zarr format: {zarr_format}. Expected 2 or 3.")
+
+
+def float_to_json(data: float | np.floating[Any], zarr_format: ZarrFormat) -> JSONFloat:
+    """
+    Convert a float to JSON, parametrized by the zarr format version.
+
+    Parameters
+    ----------
+    data : float or np.floating
+        The float value to convert.
+    zarr_format : ZarrFormat
+        The zarr format version.
+
+    Returns
+    -------
+    JSONFloat
+        The JSON representation of the float.
+    """
+    if zarr_format == 2:
+        return float_to_json_v2(data)
+    else:
+        return float_to_json_v3(data)
+    raise ValueError(f"Invalid zarr format: {zarr_format}. Expected 2 or 3.")
+
+
+def check_json_float_v2(data: JSON) -> TypeGuard[JSONFloat]:
+    """
+    Check if a JSON value represents a float (v2).
+
+    Parameters
+    ----------
+    data : JSON
+        The JSON value to check.
+
+    Returns
+    -------
+    Bool
+        True if the data is a float, False otherwise.
+    """
+    if data == "NaN" or data == "Infinity" or data == "-Infinity":
+        return True
+    return isinstance(data, float | int)
+
+
+def check_json_complex_float_v2(data: JSON) -> TypeGuard[tuple[JSONFloat, JSONFloat]]:
+    """
+    Check if a JSON value represents a complex float, as per the behavior of zarr-python 2.x
+
+    Parameters
+    ----------
+    data : JSON
+        The JSON value to check.
+
+    Returns
+    -------
+    Bool
+        True if the data is a complex float, False otherwise.
+    """
+    return (
+        not isinstance(data, str)
+        and isinstance(data, Sequence)
+        and len(data) == 2
+        and check_json_float_v2(data[0])
+        and check_json_float_v2(data[1])
+    )
+
+
+def check_json_float_v3(data: JSON) -> TypeGuard[JSONFloat]:
+    """
+    Check if a JSON value represents a float (v3).
+
+    Parameters
+    ----------
+    data : JSON
+        The JSON value to check.
+
+    Returns
+    -------
+    Bool
+        True if the data is a float, False otherwise.
+    """
+    # TODO: handle the special JSON serialization of different NaN values
+    return check_json_float_v2(data)
+
+
+def check_json_complex_float_v3(data: JSON) -> TypeGuard[tuple[JSONFloat, JSONFloat]]:
+    """
+    Check if a JSON value represents a complex float, as per the zarr v3 spec
+
+    Parameters
+    ----------
+    data : JSON
+        The JSON value to check.
+
+    Returns
+    -------
+    Bool
+        True if the data is a complex float, False otherwise.
+    """
+    return (
+        not isinstance(data, str)
+        and isinstance(data, Sequence)
+        and len(data) == 2
+        and check_json_float_v3(data[0])
+        and check_json_float_v3(data[1])
+    )
+
+
+def check_json_complex_float(
+    data: JSON, zarr_format: ZarrFormat
+) -> TypeGuard[tuple[JSONFloat, JSONFloat]]:
+    """
+    Check if a JSON value represents a complex float based on zarr format.
+
+    Parameters
+    ----------
+    data : JSON
+        The JSON value to check.
+    zarr_format : ZarrFormat
+        The zarr format version.
+
+    Returns
+    -------
+    Bool
+        True if the data represents a complex float, False otherwise.
+    """
+    if zarr_format == 2:
+        return check_json_complex_float_v2(data)
+    return check_json_complex_float_v3(data)
+
+
+def check_json_float(data: JSON, zarr_format: ZarrFormat) -> TypeGuard[float]:
+    """
+    Check if a JSON value represents a float based on zarr format.
+
+    Parameters
+    ----------
+    data : JSON
+        The JSON value to check.
+    zarr_format : ZarrFormat
+        The zarr format version.
+
+    Returns
+    -------
+    Bool
+        True if the data is a float, False otherwise.
+    """
+    if zarr_format == 2:
+        return check_json_float_v2(data)
+    else:
+        return check_json_float_v3(data)
+
+
+def check_json_int(data: JSON) -> TypeGuard[int]:
+    """
+    Check if a JSON value is an integer.
+
+    Parameters
+    ----------
+    data : JSON
+        The JSON value to check.
+
+    Returns
+    -------
+    Bool
+        True if the data is an integer, False otherwise.
+    """
+    return bool(isinstance(data, int))
+
+
+def check_json_str(data: JSON) -> TypeGuard[str]:
+    """
+    Check if a JSON value is a string.
+
+    Parameters
+    ----------
+    data : JSON
+        The JSON value to check.
+
+    Returns
+    -------
+    Bool
+        True if the data is a string, False otherwise.
+    """
+    return bool(isinstance(data, str))
+
+
+def check_json_bool(data: JSON) -> TypeGuard[bool]:
+    """
+    Check if a JSON value is a boolean.
+
+    Parameters
+    ----------
+    data : JSON
+        The JSON value to check.
+
+    Returns
+    -------
+    Bool
+        True if the data is a boolean, False otherwise.
+    """
+    return isinstance(data, bool)
+
+
+def complex_float_from_json_v2(data: tuple[JSONFloat, JSONFloat]) -> complex:
+    """
+    Convert a JSON complex float to a complex number (v2).
+
+    Parameters
+    ----------
+    data : tuple[JSONFloat, JSONFloat]
+        The JSON complex float to convert.
+
+    Returns
+    -------
+    np.complexfloating
+        The complex number.
+    """
+    return complex(float_from_json_v2(data[0]), float_from_json_v2(data[1]))
+
+
+def complex_float_from_json_v3(data: tuple[JSONFloat, JSONFloat]) -> complex:
+    """
+    Convert a JSON complex float to a complex number (v3).
+
+    Parameters
+    ----------
+    data : tuple[JSONFloat, JSONFloat]
+        The JSON complex float to convert.
+
+    Returns
+    -------
+    np.complexfloating
+        The complex number.
+    """
+    return complex(float_from_json_v3(data[0]), float_from_json_v3(data[1]))
+
+
+def complex_float_from_json(data: tuple[JSONFloat, JSONFloat], zarr_format: ZarrFormat) -> complex:
+    """
+    Convert a JSON complex float to a complex number based on zarr format.
+
+    Parameters
+    ----------
+    data : tuple[JSONFloat, JSONFloat]
+        The JSON complex float to convert.
+    zarr_format : ZarrFormat
+        The zarr format version.
+
+    Returns
+    -------
+    np.complexfloating
+        The complex number.
+    """
+    if zarr_format == 2:
+        return complex_float_from_json_v2(data)
+    else:
+        return complex_float_from_json_v3(data)
+    raise ValueError(f"Invalid zarr format: {zarr_format}. Expected 2 or 3.")
diff --git a/src/zarr/core/dtype/npy/complex.py b/src/zarr/core/dtype/npy/complex.py
new file mode 100644
index 0000000000..22e1bd66a3
--- /dev/null
+++ b/src/zarr/core/dtype/npy/complex.py
@@ -0,0 +1,155 @@
+from dataclasses import dataclass
+from typing import (
+    TYPE_CHECKING,
+    ClassVar,
+    Self,
+    TypeGuard,
+    cast,
+)
+
+import numpy as np
+
+from zarr.core.common import JSON, ZarrFormat
+from zarr.core.dtype.common import HasEndianness
+from zarr.core.dtype.npy.common import (
+    ComplexLike,
+    TComplexDType_co,
+    TComplexScalar_co,
+    check_json_complex_float,
+    complex_float_from_json,
+    complex_float_to_json,
+    endianness_from_numpy_str,
+    endianness_to_numpy_str,
+)
+from zarr.core.dtype.wrapper import ZDType, _BaseDType
+
+if TYPE_CHECKING:
+    from zarr.core.dtype.npy.common import EndiannessNumpy
+
+
+@dataclass(frozen=True)
+class BaseComplex(ZDType[TComplexDType_co, TComplexScalar_co], HasEndianness):
+    # This attribute holds the possible zarr v2 JSON names for the data type
+    _zarr_v2_names: ClassVar[tuple[str, ...]]
+
+    @classmethod
+    def _from_dtype_unsafe(cls, dtype: _BaseDType) -> Self:
+        byte_order = cast("EndiannessNumpy", dtype.byteorder)
+        return cls(endianness=endianness_from_numpy_str(byte_order))
+
+    def to_dtype(self) -> TComplexDType_co:
+        byte_order = endianness_to_numpy_str(self.endianness)
+        return self.dtype_cls().newbyteorder(byte_order)  # type: ignore[return-value]
+
+    def to_json(self, zarr_format: ZarrFormat) -> str:
+        """
+        Convert the wrapped data type to a JSON-serializable form.
+
+        Parameters
+        ----------
+        zarr_format : ZarrFormat
+            The zarr format version.
+
+        Returns
+        -------
+        str
+            The JSON-serializable representation of the wrapped data type
+        """
+        if zarr_format == 2:
+            return self.to_dtype().str
+        elif zarr_format == 3:
+            return self._zarr_v3_name
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+
+    @classmethod
+    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
+        if zarr_format == 2:
+            return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
+        elif zarr_format == 3:
+            return cls()
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+
+    @classmethod
+    def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[JSON]:
+        """
+        Check that the input is a valid JSON representation of this data type.
+        """
+        if zarr_format == 2:
+            return data in cls._zarr_v2_names
+        elif zarr_format == 3:
+            return data == cls._zarr_v3_name
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+
+    def check_value(self, value: object) -> bool:
+        return isinstance(value, ComplexLike)
+
+    def _cast_value_unsafe(self, value: object) -> TComplexScalar_co:
+        if self.check_value(value):
+            return self.to_dtype().type(value)  # type: ignore[arg-type, return-value]
+        raise TypeError(f"Invalid type: {value}. Expected a value castable to a complex scalar.")
+
+    def default_value(self) -> TComplexScalar_co:
+        """
+        Get the default value, which is 0 cast to this dtype
+
+        Returns
+        -------
+        Int scalar
+            The default value.
+        """
+        return self._cast_value_unsafe(0)
+
+    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> TComplexScalar_co:
+        """
+        Read a JSON-serializable value as a numpy float.
+
+        Parameters
+        ----------
+        data : JSON
+            The JSON-serializable value.
+        zarr_format : ZarrFormat
+            The zarr format version.
+
+        Returns
+        -------
+        TScalar_co
+            The numpy float.
+        """
+        if check_json_complex_float(data, zarr_format=zarr_format):
+            return self._cast_value_unsafe(complex_float_from_json(data, zarr_format=zarr_format))
+        raise TypeError(
+            f"Invalid type: {data}. Expected a float or a special string encoding of a float."
+        )
+
+    def to_json_value(self, data: object, zarr_format: ZarrFormat) -> JSON:
+        """
+        Convert an object to a JSON-serializable float.
+
+        Parameters
+        ----------
+        data : _BaseScalar
+            The value to convert.
+        zarr_format : ZarrFormat
+            The zarr format version.
+
+        Returns
+        -------
+        JSON
+            The JSON-serializable form of the complex number, which is a list of two floats,
+            each of which is encoding according to a zarr-format-specific encoding.
+        """
+        return complex_float_to_json(self.cast_value(data), zarr_format=zarr_format)
+
+
+@dataclass(frozen=True, kw_only=True)
+class Complex64(BaseComplex[np.dtypes.Complex64DType, np.complex64]):
+    dtype_cls = np.dtypes.Complex64DType
+    _zarr_v3_name = "complex64"
+    _zarr_v2_names: ClassVar[tuple[str, ...]] = (">c8", "<c8")
+
+
+@dataclass(frozen=True, kw_only=True)
+class Complex128(BaseComplex[np.dtypes.Complex128DType, np.complex128], HasEndianness):
+    dtype_cls = np.dtypes.Complex128DType
+    _zarr_v3_name = "complex128"
+    _zarr_v2_names: ClassVar[tuple[str, ...]] = (">c16", "<c16")
diff --git a/src/zarr/core/dtype/npy/float.py b/src/zarr/core/dtype/npy/float.py
new file mode 100644
index 0000000000..3f56919cf4
--- /dev/null
+++ b/src/zarr/core/dtype/npy/float.py
@@ -0,0 +1,154 @@
+from dataclasses import dataclass
+from typing import ClassVar, Self, TypeGuard, cast
+
+import numpy as np
+
+from zarr.core.common import JSON, ZarrFormat
+from zarr.core.dtype.common import HasEndianness
+from zarr.core.dtype.npy.common import (
+    EndiannessNumpy,
+    FloatLike,
+    TFloatDType_co,
+    TFloatScalar_co,
+    check_json_float,
+    endianness_from_numpy_str,
+    endianness_to_numpy_str,
+    float_from_json,
+    float_to_json,
+)
+from zarr.core.dtype.wrapper import ZDType, _BaseDType
+
+
+@dataclass(frozen=True)
+class BaseFloat(ZDType[TFloatDType_co, TFloatScalar_co], HasEndianness):
+    # This attribute holds the possible zarr v2 JSON names for the data type
+    _zarr_v2_names: ClassVar[tuple[str, ...]]
+
+    @classmethod
+    def _from_dtype_unsafe(cls, dtype: _BaseDType) -> Self:
+        byte_order = cast("EndiannessNumpy", dtype.byteorder)
+        return cls(endianness=endianness_from_numpy_str(byte_order))
+
+    def to_dtype(self) -> TFloatDType_co:
+        byte_order = endianness_to_numpy_str(self.endianness)
+        return self.dtype_cls().newbyteorder(byte_order)  # type: ignore[return-value]
+
+    def to_json(self, zarr_format: ZarrFormat) -> str:
+        """
+        Convert the wrapped data type to a JSON-serializable form.
+
+        Parameters
+        ----------
+        zarr_format : ZarrFormat
+            The zarr format version.
+
+        Returns
+        -------
+        str
+            The JSON-serializable representation of the wrapped data type
+        """
+        if zarr_format == 2:
+            return self.to_dtype().str
+        elif zarr_format == 3:
+            return self._zarr_v3_name
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+
+    @classmethod
+    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
+        if zarr_format == 2:
+            return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
+        elif zarr_format == 3:
+            return cls()
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+
+    @classmethod
+    def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[JSON]:
+        """
+        Check that the input is a valid JSON representation of this data type.
+        """
+        if zarr_format == 2:
+            return data in cls._zarr_v2_names
+        elif zarr_format == 3:
+            return data == cls._zarr_v3_name
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+
+    def check_value(self, value: object) -> TypeGuard[FloatLike]:
+        return isinstance(value, FloatLike)
+
+    def _cast_value_unsafe(self, value: object) -> TFloatScalar_co:
+        if self.check_value(value):
+            return self.to_dtype().type(value)  # type: ignore[return-value]
+        raise TypeError(f"Invalid type: {value}. Expected a value castable to a float.")
+
+    def default_value(self) -> TFloatScalar_co:
+        """
+        Get the default value, which is 0 cast to this dtype
+
+        Returns
+        -------
+        Int scalar
+            The default value.
+        """
+        return self._cast_value_unsafe(0)
+
+    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> TFloatScalar_co:
+        """
+        Read a JSON-serializable value as a numpy float.
+
+        Parameters
+        ----------
+        data : JSON
+            The JSON-serializable value.
+        zarr_format : ZarrFormat
+            The zarr format version.
+
+        Returns
+        -------
+        TScalar_co
+            The numpy float.
+        """
+        if check_json_float(data, zarr_format=zarr_format):
+            return self._cast_value_unsafe(float_from_json(data, zarr_format=zarr_format))
+        raise TypeError(
+            f"Invalid type: {data}. Expected a float or a special string encoding of a float."
+        )
+
+    def to_json_value(self, data: object, zarr_format: ZarrFormat) -> float | str:
+        """
+        Convert an object to a JSON-serializable float.
+
+        Parameters
+        ----------
+        data : _BaseScalar
+            The value to convert.
+        zarr_format : ZarrFormat
+            The zarr format version.
+
+        Returns
+        -------
+        JSON
+            The JSON-serializable form of the float, which is potentially a number or a string.
+            See the zarr specifications for details on the JSON encoding for floats.
+        """
+        return float_to_json(self._cast_value_unsafe(data), zarr_format=zarr_format)
+
+
+@dataclass(frozen=True, kw_only=True)
+class Float16(BaseFloat[np.dtypes.Float16DType, np.float16]):
+    dtype_cls = np.dtypes.Float16DType
+    _zarr_v3_name = "float16"
+    _zarr_v2_names: ClassVar[tuple[str, ...]] = (">f2", "<f2")
+
+
+@dataclass(frozen=True, kw_only=True)
+class Float32(BaseFloat[np.dtypes.Float32DType, np.float32]):
+    dtype_cls = np.dtypes.Float32DType
+    _zarr_v3_name = "float32"
+    _zarr_v2_names: ClassVar[tuple[str, ...]] = (">f4", "<f4")
+
+
+@dataclass(frozen=True, kw_only=True)
+class Float64(BaseFloat[np.dtypes.Float64DType, np.float64]):
+    dtype_cls = np.dtypes.Float64DType
+    _zarr_v3_name = "float64"
+    _zarr_v2_names: ClassVar[tuple[str, ...]] = (">f8", "<f8")
diff --git a/src/zarr/core/dtype/npy/int.py b/src/zarr/core/dtype/npy/int.py
new file mode 100644
index 0000000000..500f98bb73
--- /dev/null
+++ b/src/zarr/core/dtype/npy/int.py
@@ -0,0 +1,318 @@
+from dataclasses import dataclass
+from typing import ClassVar, Self, SupportsIndex, SupportsInt, TypeGuard, TypeVar, cast
+
+import numpy as np
+
+from zarr.core.common import JSON, ZarrFormat
+from zarr.core.dtype.common import HasEndianness
+from zarr.core.dtype.npy.common import (
+    EndiannessNumpy,
+    check_json_int,
+    endianness_from_numpy_str,
+    endianness_to_numpy_str,
+)
+from zarr.core.dtype.wrapper import ZDType, _BaseDType
+
+_NumpyIntDType = (
+    np.dtypes.Int8DType
+    | np.dtypes.Int16DType
+    | np.dtypes.Int32DType
+    | np.dtypes.Int64DType
+    | np.dtypes.UInt8DType
+    | np.dtypes.UInt16DType
+    | np.dtypes.UInt32DType
+    | np.dtypes.UInt64DType
+)
+_NumpyIntScalar = (
+    np.int8 | np.int16 | np.int32 | np.int64 | np.uint8 | np.uint16 | np.uint32 | np.uint64
+)
+TIntDType_co = TypeVar("TIntDType_co", bound=_NumpyIntDType, covariant=True)
+TIntScalar_co = TypeVar("TIntScalar_co", bound=_NumpyIntScalar, covariant=True)
+IntLike = SupportsInt | SupportsIndex | bytes | str
+
+
+@dataclass(frozen=True)
+class BaseInt(ZDType[TIntDType_co, TIntScalar_co]):
+    # This attribute holds the possible zarr v2 JSON names for the data type
+    _zarr_v2_names: ClassVar[tuple[str, ...]]
+
+    def to_json(self, zarr_format: ZarrFormat) -> str:
+        """
+        Convert the wrapped data type to a JSON-serializable form.
+
+        Parameters
+        ----------
+        zarr_format : ZarrFormat
+            The zarr format version.
+
+        Returns
+        -------
+        str
+            The JSON-serializable representation of the wrapped data type
+        """
+        if zarr_format == 2:
+            return self.to_dtype().str
+        elif zarr_format == 3:
+            return self._zarr_v3_name
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+
+    @classmethod
+    def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[JSON]:
+        """
+        Check that the input is a valid JSON representation of this data type.
+        """
+        if zarr_format == 2:
+            return data in cls._zarr_v2_names
+        elif zarr_format == 3:
+            return data == cls._zarr_v3_name
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+
+    def check_value(self, value: object) -> TypeGuard[IntLike]:
+        return isinstance(value, IntLike)
+
+    def _cast_value_unsafe(self, value: object) -> TIntScalar_co:
+        if self.check_value(value):
+            return self.to_dtype().type(value)  # type: ignore[return-value]
+        raise TypeError(f"Invalid type: {value}. Expected a value castable to an integer.")
+
+    def default_value(self) -> TIntScalar_co:
+        """
+        Get the default value, which is 0 cast to this dtype
+
+        Returns
+        -------
+        Int scalar
+            The default value.
+        """
+        return self._cast_value_unsafe(0)
+
+    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> TIntScalar_co:
+        """
+        Read a JSON-serializable value as a numpy int scalar.
+
+        Parameters
+        ----------
+        data : JSON
+            The JSON-serializable value.
+        zarr_format : ZarrFormat
+            The zarr format version.
+
+        Returns
+        -------
+        TScalar_co
+            The numpy scalar.
+        """
+        if check_json_int(data):
+            return self._cast_value_unsafe(data)
+        raise TypeError(f"Invalid type: {data}. Expected an integer.")
+
+    def to_json_value(self, data: object, zarr_format: ZarrFormat) -> int:
+        """
+        Convert an object to JSON-serializable scalar.
+
+        Parameters
+        ----------
+        data : _BaseScalar
+            The value to convert.
+        zarr_format : ZarrFormat
+            The zarr format version.
+
+        Returns
+        -------
+        int
+            The JSON-serializable form of the scalar.
+        """
+        return int(self.cast_value(data))
+
+
+@dataclass(frozen=True, kw_only=True)
+class Int8(BaseInt[np.dtypes.Int8DType, np.int8]):
+    dtype_cls = np.dtypes.Int8DType
+    _zarr_v3_name = "int8"
+    _zarr_v2_names: ClassVar[tuple[str, ...]] = ("|i1",)
+
+    @classmethod
+    def _from_dtype_unsafe(cls, dtype: _BaseDType) -> Self:
+        return cls()
+
+    def to_dtype(self: Self) -> np.dtypes.Int8DType:
+        return self.dtype_cls()
+
+    @classmethod
+    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
+        return cls()
+
+
+@dataclass(frozen=True, kw_only=True)
+class UInt8(BaseInt[np.dtypes.UInt8DType, np.uint8]):
+    dtype_cls = np.dtypes.UInt8DType
+    _zarr_v3_name = "uint8"
+    _zarr_v2_names: ClassVar[tuple[str, ...]] = ("|u1",)
+
+    @classmethod
+    def _from_dtype_unsafe(cls, dtype: _BaseDType) -> Self:
+        return cls()
+
+    def to_dtype(self: Self) -> np.dtypes.UInt8DType:
+        return self.dtype_cls()
+
+    @classmethod
+    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
+        return cls()
+
+
+@dataclass(frozen=True, kw_only=True)
+class Int16(BaseInt[np.dtypes.Int16DType, np.int16], HasEndianness):
+    dtype_cls = np.dtypes.Int16DType
+    _zarr_v3_name = "int16"
+    _zarr_v2_names: ClassVar[tuple[str, ...]] = (">i2", "<i2")
+
+    @classmethod
+    def _from_dtype_unsafe(cls, dtype: _BaseDType) -> Self:
+        byte_order = cast("EndiannessNumpy", dtype.byteorder)
+        return cls(endianness=endianness_from_numpy_str(byte_order))
+
+    def to_dtype(self) -> np.dtypes.Int16DType:
+        byte_order = endianness_to_numpy_str(self.endianness)
+        return self.dtype_cls().newbyteorder(byte_order)
+
+    @classmethod
+    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
+        if zarr_format == 2:
+            # This ensures that we get the endianness correct without annoying string parsing
+            return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
+        elif zarr_format == 3:
+            return cls()
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+
+
+@dataclass(frozen=True, kw_only=True)
+class UInt16(BaseInt[np.dtypes.UInt16DType, np.uint16], HasEndianness):
+    dtype_cls = np.dtypes.UInt16DType
+    _zarr_v3_name = "uint16"
+    _zarr_v2_names: ClassVar[tuple[str, ...]] = (">u2", "<u2")
+
+    @classmethod
+    def _from_dtype_unsafe(cls, dtype: _BaseDType) -> Self:
+        byte_order = cast("EndiannessNumpy", dtype.byteorder)
+        return cls(endianness=endianness_from_numpy_str(byte_order))
+
+    def to_dtype(self) -> np.dtypes.UInt16DType:
+        byte_order = endianness_to_numpy_str(self.endianness)
+        return self.dtype_cls().newbyteorder(byte_order)
+
+    @classmethod
+    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
+        if zarr_format == 2:
+            return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
+        elif zarr_format == 3:
+            return cls()
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+
+
+@dataclass(frozen=True, kw_only=True)
+class Int32(BaseInt[np.dtypes.Int32DType, np.int32], HasEndianness):
+    dtype_cls = np.dtypes.Int32DType
+    _zarr_v3_name = "int32"
+    _zarr_v2_names: ClassVar[tuple[str, ...]] = (">i4", "<i4")
+
+    @classmethod
+    def from_dtype(cls: type[Self], dtype: _BaseDType) -> Self:
+        # We override the base implementation to address a windows-specific, pre-numpy 2 issue where
+        # ``np.dtype('i')`` is an instance of ``np.dtypes.IntDType`` that acts like `int32` instead of ``np.dtype('int32')``
+        # In this case, ``type(np.dtype('i')) == np.dtypes.Int32DType``  will evaluate to ``True``,
+        # despite the two classes being different. Thus we will create an instance of `cls` with the
+        # latter dtype, after pulling in the byte order of the input
+        if dtype == np.dtypes.Int32DType():
+            return cls._from_dtype_unsafe(np.dtypes.Int32DType().newbyteorder(dtype.byteorder))
+        else:
+            return super().from_dtype(dtype)
+
+    @classmethod
+    def _from_dtype_unsafe(cls, dtype: _BaseDType) -> Self:
+        byte_order = cast("EndiannessNumpy", dtype.byteorder)
+        return cls(endianness=endianness_from_numpy_str(byte_order))
+
+    def to_dtype(self) -> np.dtypes.Int32DType:
+        byte_order = endianness_to_numpy_str(self.endianness)
+        return self.dtype_cls().newbyteorder(byte_order)
+
+    @classmethod
+    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
+        if zarr_format == 2:
+            return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
+        elif zarr_format == 3:
+            return cls()
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+
+
+@dataclass(frozen=True, kw_only=True)
+class UInt32(BaseInt[np.dtypes.UInt32DType, np.uint32], HasEndianness):
+    dtype_cls = np.dtypes.UInt32DType
+    _zarr_v3_name = "uint32"
+    _zarr_v2_names: ClassVar[tuple[str, ...]] = (">u4", "<u4")
+
+    @classmethod
+    def _from_dtype_unsafe(cls, dtype: _BaseDType) -> Self:
+        byte_order = cast("EndiannessNumpy", dtype.byteorder)
+        return cls(endianness=endianness_from_numpy_str(byte_order))
+
+    def to_dtype(self) -> np.dtypes.UInt32DType:
+        byte_order = endianness_to_numpy_str(self.endianness)
+        return self.dtype_cls().newbyteorder(byte_order)
+
+    @classmethod
+    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
+        if zarr_format == 2:
+            return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
+        elif zarr_format == 3:
+            return cls()
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+
+
+@dataclass(frozen=True, kw_only=True)
+class Int64(BaseInt[np.dtypes.Int64DType, np.int64], HasEndianness):
+    dtype_cls = np.dtypes.Int64DType
+    _zarr_v3_name = "int64"
+    _zarr_v2_names: ClassVar[tuple[str, ...]] = (">i8", "<i8")
+
+    @classmethod
+    def _from_dtype_unsafe(cls, dtype: _BaseDType) -> Self:
+        byte_order = cast("EndiannessNumpy", dtype.byteorder)
+        return cls(endianness=endianness_from_numpy_str(byte_order))
+
+    def to_dtype(self) -> np.dtypes.Int64DType:
+        byte_order = endianness_to_numpy_str(self.endianness)
+        return self.dtype_cls().newbyteorder(byte_order)
+
+    @classmethod
+    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
+        if zarr_format == 2:
+            return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
+        elif zarr_format == 3:
+            return cls()
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+
+
+@dataclass(frozen=True, kw_only=True)
+class UInt64(BaseInt[np.dtypes.UInt64DType, np.uint64], HasEndianness):
+    dtype_cls = np.dtypes.UInt64DType
+    _zarr_v3_name = "uint64"
+    _zarr_v2_names: ClassVar[tuple[str, ...]] = (">u8", "<u8")
+
+    @classmethod
+    def _from_dtype_unsafe(cls, dtype: _BaseDType) -> Self:
+        byte_order = cast("EndiannessNumpy", dtype.byteorder)
+        return cls(endianness=endianness_from_numpy_str(byte_order))
+
+    def to_dtype(self) -> np.dtypes.UInt64DType:
+        byte_order = endianness_to_numpy_str(self.endianness)
+        return self.dtype_cls().newbyteorder(byte_order)
+
+    @classmethod
+    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
+        if zarr_format == 2:
+            return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
+        elif zarr_format == 3:
+            return cls()
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
diff --git a/src/zarr/core/dtype/npy/sized.py b/src/zarr/core/dtype/npy/sized.py
new file mode 100644
index 0000000000..8d8ff57800
--- /dev/null
+++ b/src/zarr/core/dtype/npy/sized.py
@@ -0,0 +1,382 @@
+import base64
+import re
+from collections.abc import Sequence
+from dataclasses import dataclass
+from typing import Any, ClassVar, Self, TypeGuard, cast
+
+import numpy as np
+
+from zarr.core.common import JSON, ZarrFormat
+from zarr.core.dtype.common import DataTypeValidationError, HasEndianness, HasLength
+from zarr.core.dtype.npy.common import (
+    EndiannessNumpy,
+    bytes_from_json,
+    bytes_to_json,
+    check_json_str,
+    endianness_from_numpy_str,
+    endianness_to_numpy_str,
+)
+from zarr.core.dtype.wrapper import ZDType, _BaseDType, _BaseScalar
+
+
+@dataclass(frozen=True, kw_only=True)
+class FixedLengthAscii(ZDType[np.dtypes.BytesDType[int], np.bytes_], HasLength):
+    dtype_cls = np.dtypes.BytesDType
+    _zarr_v3_name = "numpy.fixed_length_ascii"
+    item_size_bits: ClassVar[int] = 8
+
+    @classmethod
+    def _from_dtype_unsafe(cls, dtype: _BaseDType) -> Self:
+        return cls(length=dtype.itemsize // (cls.item_size_bits // 8))
+
+    def to_dtype(self) -> np.dtypes.BytesDType[int]:
+        return self.dtype_cls(self.length)
+
+    @classmethod
+    def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[JSON]:
+        """
+        Check that the input is a valid JSON representation of a numpy S dtype.
+        """
+        if zarr_format == 2:
+            # match |S1, |S2, etc
+            return isinstance(data, str) and re.match(r"^\|S\d+$", data) is not None
+        elif zarr_format == 3:
+            return (
+                isinstance(data, dict)
+                and "name" in data
+                and data["name"] == cls._zarr_v3_name
+                and "configuration" in data
+                and isinstance(data["configuration"], dict)
+                and "length_bits" in data["configuration"]
+                and isinstance(data["configuration"]["length_bits"], int)
+            )
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+
+    def to_json(self, zarr_format: ZarrFormat) -> JSON:
+        if zarr_format == 2:
+            return self.to_dtype().str
+        elif zarr_format == 3:
+            return {
+                "name": self._zarr_v3_name,
+                "configuration": {"length_bits": self.length * self.item_size_bits},
+            }
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+
+    @classmethod
+    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
+        if zarr_format == 2:
+            return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
+        elif zarr_format == 3:
+            return cls(length=data["configuration"]["length_bits"] // cls.item_size_bits)  # type: ignore[arg-type, index, call-overload, operator]
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+
+    def default_value(self) -> np.bytes_:
+        return np.bytes_(b"")
+
+    def to_json_value(self, data: object, *, zarr_format: ZarrFormat) -> str:
+        return base64.standard_b64encode(data).decode("ascii")  # type: ignore[arg-type]
+
+    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.bytes_:
+        if check_json_str(data):
+            return self.to_dtype().type(base64.standard_b64decode(data.encode("ascii")))
+        raise TypeError(f"Invalid type: {data}. Expected a string.")
+
+    def check_value(self, data: object) -> bool:
+        return isinstance(data, np.bytes_ | str | bytes)
+
+    def _cast_value_unsafe(self, value: object) -> np.bytes_:
+        return self.to_dtype().type(value)
+
+
+@dataclass(frozen=True, kw_only=True)
+class FixedLengthBytes(ZDType[np.dtypes.VoidDType[int], np.void], HasLength):
+    # np.dtypes.VoidDType is specified in an odd way in numpy
+    # it cannot be used to create instances of the dtype
+    # so we have to tell mypy to ignore this here
+    dtype_cls = np.dtypes.VoidDType  # type: ignore[assignment]
+    _zarr_v3_name = "numpy.void"
+    item_size_bits: ClassVar[int] = 8
+
+    @classmethod
+    def _from_dtype_unsafe(cls, dtype: _BaseDType) -> Self:
+        return cls(length=dtype.itemsize // (cls.item_size_bits // 8))
+
+    def to_dtype(self) -> np.dtypes.VoidDType[int]:
+        # Numpy does not allow creating a void type
+        # by invoking np.dtypes.VoidDType directly
+        return cast("np.dtypes.VoidDType[int]", np.dtype(f"V{self.length}"))
+
+    @classmethod
+    def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[JSON]:
+        if zarr_format == 2:
+            # Check that the dtype is |V1, |V2, ...
+            return isinstance(data, str) and re.match(r"^\|V\d+$", data) is not None
+        elif zarr_format == 3:
+            return (
+                isinstance(data, dict)
+                and "name" in data
+                and isinstance(data["name"], str)
+                and (re.match(r"^r\d+$", data["name"]) is not None)
+            )
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+
+    def to_json(self, zarr_format: ZarrFormat) -> JSON:
+        if zarr_format == 2:
+            return self.to_dtype().str
+        elif zarr_format == 3:
+            return {"name": f"r{self.length * self.item_size_bits}"}
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+
+    @classmethod
+    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
+        if zarr_format == 2:
+            return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
+        elif zarr_format == 3:
+            return cls(length=int(data["name"][1:]) // cls.item_size_bits)  # type: ignore[arg-type, index, call-overload]
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+
+    @classmethod
+    def check_dtype(cls: type[Self], dtype: _BaseDType) -> TypeGuard[np.dtypes.VoidDType[Any]]:
+        """
+        Numpy void dtype comes in two forms:
+        * If the ``fields`` attribute is ``None``, then the dtype represents N raw bytes.
+        * If the ``fields`` attribute is not ``None``, then the dtype represents a structured dtype,
+
+        In this check we ensure that ``fields`` is ``None``.
+
+        Parameters
+        ----------
+        dtype : TDType
+            The dtype to check.
+
+        Returns
+        -------
+        Bool
+            True if the dtype matches, False otherwise.
+        """
+        return cls.dtype_cls is type(dtype) and dtype.fields is None  # type: ignore[has-type]
+
+    def default_value(self) -> np.void:
+        return self.to_dtype().type(("\x00" * self.length).encode("ascii"))
+
+    def to_json_value(self, data: object, *, zarr_format: ZarrFormat) -> str:
+        return base64.standard_b64encode(self.cast_value(data).tobytes()).decode("ascii")
+
+    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.void:
+        if check_json_str(data):
+            return self.to_dtype().type(base64.standard_b64decode(data))
+        raise DataTypeValidationError(f"Invalid type: {data}. Expected a string.")
+
+    def check_value(self, data: object) -> bool:
+        return isinstance(data, np.bytes_ | str | bytes | np.void)
+
+    def _cast_value_unsafe(self, value: object) -> np.void:
+        return self.to_dtype().type(value)  # type: ignore[call-overload, no-any-return]
+
+
+@dataclass(frozen=True, kw_only=True)
+class FixedLengthUnicode(ZDType[np.dtypes.StrDType[int], np.str_], HasEndianness, HasLength):
+    dtype_cls = np.dtypes.StrDType
+    _zarr_v3_name = "numpy.fixed_length_ucs4"
+    item_size_bits: ClassVar[int] = 32  # UCS4 is 32 bits per code point
+
+    @classmethod
+    def _from_dtype_unsafe(cls, dtype: _BaseDType) -> Self:
+        byte_order = cast("EndiannessNumpy", dtype.byteorder)
+        return cls(
+            length=dtype.itemsize // (cls.item_size_bits // 8),
+            endianness=endianness_from_numpy_str(byte_order),
+        )
+
+    def to_dtype(self) -> np.dtypes.StrDType[int]:
+        byte_order = endianness_to_numpy_str(self.endianness)
+        return self.dtype_cls(self.length).newbyteorder(byte_order)
+
+    @classmethod
+    def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[JSON]:
+        """
+        Check that the input is a valid JSON representation of a numpy S dtype.
+        """
+        if zarr_format == 2:
+            # match >U1, <U2, etc
+            return isinstance(data, str) and re.match(r"^[><]U\d+$", data) is not None
+        elif zarr_format == 3:
+            return (
+                isinstance(data, dict)
+                and "name" in data
+                and data["name"] == cls._zarr_v3_name
+                and "configuration" in data
+                and isinstance(data["configuration"], dict)
+                and "length_bits" in data["configuration"]
+                and isinstance(data["configuration"]["length_bits"], int)
+            )
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+
+    def to_json(self, zarr_format: ZarrFormat) -> JSON:
+        if zarr_format == 2:
+            return self.to_dtype().str
+        elif zarr_format == 3:
+            return {
+                "name": self._zarr_v3_name,
+                "configuration": {"length_bits": self.length * self.item_size_bits},
+            }
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+
+    @classmethod
+    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
+        if zarr_format == 2:
+            return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
+        elif zarr_format == 3:
+            return cls(length=data["configuration"]["length_bits"] // cls.item_size_bits)  # type: ignore[arg-type, index, call-overload, operator]
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+
+    def default_value(self) -> np.str_:
+        return np.str_("")
+
+    def to_json_value(self, data: object, *, zarr_format: ZarrFormat) -> str:
+        return str(data)
+
+    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.str_:
+        if not check_json_str(data):
+            raise TypeError(f"Invalid type: {data}. Expected a string.")
+        return self.to_dtype().type(data)
+
+    def check_value(self, data: object) -> bool:
+        return isinstance(data, str | np.str_ | bytes)
+
+    def _cast_value_unsafe(self, value: object) -> np.str_:
+        return self.to_dtype().type(value)
+
+
+@dataclass(frozen=True, kw_only=True)
+class Structured(ZDType[np.dtypes.VoidDType[int], np.void]):
+    dtype_cls = np.dtypes.VoidDType  # type: ignore[assignment]
+    _zarr_v3_name = "structured"
+    fields: tuple[tuple[str, ZDType[_BaseDType, _BaseScalar]], ...]
+
+    def default_value(self) -> np.void:
+        return self._cast_value_unsafe(0)
+
+    def _cast_value_unsafe(self, value: object) -> np.void:
+        return cast("np.void", np.array([value], dtype=self.to_dtype())[0])
+
+    @classmethod
+    def check_dtype(cls, dtype: _BaseDType) -> TypeGuard[np.dtypes.VoidDType[int]]:
+        """
+        Check that this dtype is a numpy structured dtype
+
+        Parameters
+        ----------
+        dtype : np.dtypes.DTypeLike
+            The dtype to check.
+
+        Returns
+        -------
+        TypeGuard[np.dtypes.VoidDType]
+            True if the dtype matches, False otherwise.
+        """
+        return super().check_dtype(dtype) and dtype.fields is not None
+
+    @classmethod
+    def _from_dtype_unsafe(cls, dtype: _BaseDType) -> Self:
+        from zarr.core.dtype import get_data_type_from_native_dtype
+
+        fields: list[tuple[str, ZDType[_BaseDType, _BaseScalar]]] = []
+
+        if dtype.fields is None:
+            raise ValueError("numpy dtype has no fields")
+
+        # fields of a structured numpy dtype are either 2-tuples or 3-tuples. we only
+        # care about the first element in either case.
+        for key, (dtype_instance, *_) in dtype.fields.items():
+            dtype_wrapped = get_data_type_from_native_dtype(dtype_instance)
+            fields.append((key, dtype_wrapped))
+
+        return cls(fields=tuple(fields))
+
+    def to_json(self, zarr_format: ZarrFormat) -> JSON:
+        fields = [
+            (f_name, f_dtype.to_json(zarr_format=zarr_format)) for f_name, f_dtype in self.fields
+        ]
+        if zarr_format == 2:
+            return fields
+        elif zarr_format == 3:
+            base_dict = {"name": self._zarr_v3_name}
+            base_dict["configuration"] = {"fields": fields}  # type: ignore[assignment]
+            return cast("JSON", base_dict)
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+
+    @classmethod
+    def check_json(
+        cls, data: JSON, zarr_format: ZarrFormat
+    ) -> TypeGuard[dict[str, JSON] | list[Any]]:
+        # the actual JSON form is recursive and hard to annotate, so we give up and do
+        # list[Any] for now
+        if zarr_format == 2:
+            return (
+                not isinstance(data, str)
+                and isinstance(data, Sequence)
+                and all(
+                    not isinstance(field, str) and isinstance(field, Sequence) and len(field) == 2
+                    for field in data
+                )
+            )
+        elif zarr_format == 3:
+            return (
+                isinstance(data, dict)
+                and "name" in data
+                and "configuration" in data
+                and isinstance(data["configuration"], dict)
+                and "fields" in data["configuration"]
+            )
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+
+    @classmethod
+    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
+        from zarr.core.dtype import get_data_type_from_json
+
+        if cls.check_json(data, zarr_format=zarr_format):
+            if zarr_format == 2:
+                # structured dtypes are constructed directly from a list of lists
+                return cls(
+                    fields=tuple(  # type: ignore[misc]
+                        (f_name, get_data_type_from_json(f_dtype, zarr_format=zarr_format))
+                        for f_name, f_dtype in data
+                    )
+                )
+            elif zarr_format == 3:  # noqa: SIM102
+                if isinstance(data, dict) and "configuration" in data:
+                    config = data["configuration"]
+                    if isinstance(config, dict) and "fields" in config:
+                        meta_fields = config["fields"]
+                        fields = tuple(
+                            (f_name, get_data_type_from_json(f_dtype, zarr_format=zarr_format))
+                            for f_name, f_dtype in meta_fields
+                        )
+                        return cls(fields=fields)
+            raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+        raise DataTypeValidationError(f"Invalid JSON representation of data type {cls}.")
+
+    def to_dtype(self) -> np.dtypes.VoidDType[int]:
+        return cast(
+            "np.dtypes.VoidDType[int]",
+            np.dtype([(key, dtype.to_dtype()) for (key, dtype) in self.fields]),
+        )
+
+    def to_json_value(self, data: object, *, zarr_format: ZarrFormat) -> str:
+        return bytes_to_json(self.cast_value(data).tobytes(), zarr_format)
+
+    def check_value(self, data: object) -> bool:
+        # not sure which values we should accept for structured dtypes.
+        try:
+            np.array([data], dtype=self.to_dtype())
+            return True  # noqa: TRY300
+        except ValueError:
+            return False
+
+    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.void:
+        if not check_json_str(data):
+            raise TypeError(f"Invalid type: {data}. Expected a string.")
+        as_bytes = bytes_from_json(data, zarr_format=zarr_format)
+        dtype = self.to_dtype()
+        return cast("np.void", np.array([as_bytes], dtype=dtype.str).view(dtype)[0])
diff --git a/src/zarr/core/dtype/npy/string.py b/src/zarr/core/dtype/npy/string.py
new file mode 100644
index 0000000000..15ccfb30f1
--- /dev/null
+++ b/src/zarr/core/dtype/npy/string.py
@@ -0,0 +1,134 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import TYPE_CHECKING, Self, TypeGuard
+
+import numpy as np
+
+from zarr.core.dtype.npy.common import check_json_str
+from zarr.core.dtype.wrapper import ZDType
+
+if TYPE_CHECKING:
+    from zarr.core.common import JSON, ZarrFormat
+    from zarr.core.dtype.wrapper import _BaseDType
+
+_NUMPY_SUPPORTS_VLEN_STRING = hasattr(np.dtypes, "StringDType")
+
+
+if _NUMPY_SUPPORTS_VLEN_STRING:
+
+    @dataclass(frozen=True, kw_only=True)
+    class VariableLengthString(ZDType[np.dtypes.StringDType, str]):  # type: ignore[type-var]
+        dtype_cls = np.dtypes.StringDType
+        _zarr_v3_name = "numpy.variable_length_utf8"
+
+        @classmethod
+        def _from_dtype_unsafe(cls, dtype: _BaseDType) -> Self:
+            return cls()
+
+        def to_dtype(self) -> np.dtypes.StringDType:
+            return self.dtype_cls()
+
+        @classmethod
+        def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[JSON]:
+            """
+            Check that the input is a valid JSON representation of a numpy string dtype.
+            """
+            if zarr_format == 2:
+                # TODO: take the entire metadata document in here, and
+                # check the compressors / filters for vlen-utf8
+                # Note that we are checking for the object dtype name.
+                return data == "|O"
+            elif zarr_format == 3:
+                return data == cls._zarr_v3_name
+            raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+
+        def to_json(self, zarr_format: ZarrFormat) -> JSON:
+            if zarr_format == 2:
+                # Note: unlike many other numpy data types, we don't serialize the .str attribute
+                # of the data type to JSON. This is because Zarr was using `|O` for strings before the
+                # numpy variable length string data type existed, and we want to be consistent with
+                # that practice
+                return "|O"
+            elif zarr_format == 3:
+                return self._zarr_v3_name
+            raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+
+        @classmethod
+        def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
+            return cls()
+
+        def default_value(self) -> str:
+            return ""
+
+        def to_json_value(self, data: object, *, zarr_format: ZarrFormat) -> str:
+            return str(data)
+
+        def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> str:
+            if not check_json_str(data):
+                raise TypeError(f"Invalid type: {data}. Expected a string.")
+            return data
+
+        def check_value(self, data: object) -> bool:
+            return isinstance(data, str)
+
+        def _cast_value_unsafe(self, value: object) -> str:
+            return str(value)
+
+else:
+    # Numpy pre-2 does not have a variable length string dtype, so we use the Object dtype instead.
+    @dataclass(frozen=True, kw_only=True)
+    class VariableLengthString(ZDType[np.dtypes.ObjectDType, str]):  # type: ignore[no-redef]
+        dtype_cls = np.dtypes.ObjectDType
+        _zarr_v3_name = "numpy.variable_length_utf8"
+
+        @classmethod
+        def _from_dtype_unsafe(cls, dtype: _BaseDType) -> Self:
+            return cls()
+
+        def to_dtype(self) -> np.dtypes.ObjectDType:
+            return self.dtype_cls()
+
+        @classmethod
+        def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[JSON]:
+            """
+            Check that the input is a valid JSON representation of a numpy O dtype.
+            """
+            if zarr_format == 2:
+                # TODO: take the entire metadata document in here, and
+                # check the compressors / filters for vlen-utf8
+                return data == "|O"
+            elif zarr_format == 3:
+                return data == cls._zarr_v3_name
+            raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+
+        def to_json(self, zarr_format: ZarrFormat) -> JSON:
+            if zarr_format == 2:
+                return self.to_dtype().str
+            elif zarr_format == 3:
+                return self._zarr_v3_name
+            raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+
+        @classmethod
+        def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
+            return cls()
+
+        def default_value(self) -> str:
+            return ""
+
+        def to_json_value(self, data: object, *, zarr_format: ZarrFormat) -> str:
+            return data  # type: ignore[return-value]
+
+        def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> str:
+            """
+            Strings pass through
+            """
+            if not check_json_str(data):
+                raise TypeError(f"Invalid type: {data}. Expected a string.")
+            return data
+
+        def check_value(self, data: object) -> bool:
+            return isinstance(data, str)
+
+        def _cast_value_unsafe(self, value: object) -> str:
+            return str(value)
diff --git a/src/zarr/core/dtype/npy/time.py b/src/zarr/core/dtype/npy/time.py
new file mode 100644
index 0000000000..a10b9ae8a3
--- /dev/null
+++ b/src/zarr/core/dtype/npy/time.py
@@ -0,0 +1,142 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import TYPE_CHECKING, Self, TypeGuard, cast, get_args
+
+import numpy as np
+
+from zarr.core.dtype.common import DataTypeValidationError, HasEndianness
+from zarr.core.dtype.npy.common import (
+    DateUnit,
+    EndiannessNumpy,
+    TimeUnit,
+    check_json_int,
+    endianness_from_numpy_str,
+    endianness_to_numpy_str,
+)
+from zarr.core.dtype.wrapper import ZDType, _BaseDType
+
+if TYPE_CHECKING:
+    from zarr.core.common import JSON, ZarrFormat
+
+
+def datetime_from_json(data: int, unit: DateUnit | TimeUnit) -> np.datetime64:
+    """
+    Convert a JSON integer to a datetime64.
+
+    Parameters
+    ----------
+    data : int
+        The JSON integer to convert.
+    unit : DateUnit or TimeUnit
+        The unit of the datetime64.
+
+    Returns
+    -------
+    np.datetime64
+        The datetime64 value.
+    """
+    return cast("np.datetime64", np.int64(data).view(f"datetime64[{unit}]"))
+
+
+def datetime_to_json(data: np.datetime64) -> int:
+    """
+    Convert a datetime64 to a JSON integer.
+
+    Parameters
+    ----------
+    data : np.datetime64
+        The datetime64 value to convert.
+
+    Returns
+    -------
+    int
+        The JSON representation of the datetime64.
+    """
+    return data.view(np.int64).item()
+
+
+@dataclass(frozen=True, kw_only=True, slots=True)
+class DateTime64(ZDType[np.dtypes.DateTime64DType, np.datetime64], HasEndianness):
+    dtype_cls = np.dtypes.DateTime64DType  # type: ignore[assignment]
+    _zarr_v3_name = "numpy.datetime64"
+    unit: DateUnit | TimeUnit
+
+    @classmethod
+    def _from_dtype_unsafe(cls, dtype: _BaseDType) -> Self:
+        unit: DateUnit | TimeUnit = dtype.name[dtype.name.rfind("[") + 1 : dtype.name.rfind("]")]  # type: ignore[assignment]
+        if unit not in get_args(DateUnit) and unit not in get_args(TimeUnit):
+            raise DataTypeValidationError('Invalid unit for "numpy.datetime64"')
+        byteorder = cast("EndiannessNumpy", dtype.byteorder)
+        return cls(unit=unit, endianness=endianness_from_numpy_str(byteorder))
+
+    def to_dtype(self) -> np.dtypes.DateTime64DType:
+        # Numpy does not allow creating datetime64 via
+        # np.dtypes.DateTime64Dtype()
+        return cast(
+            "np.dtypes.DateTime64DType",
+            np.dtype(f"datetime64[{self.unit}]").newbyteorder(
+                endianness_to_numpy_str(self.endianness)
+            ),
+        )
+
+    @classmethod
+    def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[JSON]:
+        if zarr_format == 2:
+            # match <M[ns], >M[M], etc
+            # consider making this a standalone function
+            return (
+                isinstance(data, str)
+                and len(data) in (6, 7)
+                and data[0] in (">", "<")
+                and data[1:4] == "M8["
+                and data[4:-1] in get_args(TimeUnit) + get_args(DateUnit)
+                and data[-1] == "]"
+            )
+        elif zarr_format == 3:
+            return (
+                isinstance(data, dict)
+                and "name" in data
+                and data["name"] == cls._zarr_v3_name
+                and "configuration" in data
+                and "unit" in data["configuration"]
+                and data["configuration"]["unit"] in get_args(DateUnit) + get_args(TimeUnit)
+            )
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+
+    def default_value(self) -> np.datetime64:
+        return np.datetime64("NaT")
+
+    def to_json(self, zarr_format: ZarrFormat) -> JSON:
+        if zarr_format == 2:
+            return self.to_dtype().str
+        elif zarr_format == 3:
+            return {"name": self._zarr_v3_name, "configuration": {"unit": self.unit}}
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+
+    @classmethod
+    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
+        if zarr_format == 2:
+            return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
+        elif zarr_format == 3:
+            return cls(unit=data["configuration"]["unit"])  # type: ignore[arg-type, index, call-overload]
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+
+    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.datetime64:
+        if check_json_int(data):
+            return datetime_from_json(data, self.unit)
+        raise TypeError(f"Invalid type: {data}. Expected an integer.")
+
+    def to_json_value(self, data: object, *, zarr_format: ZarrFormat) -> int:
+        return datetime_to_json(data)  # type: ignore[arg-type]
+
+    def check_value(self, data: object) -> bool:
+        # TODO: decide which values we should accept for datetimes.
+        try:
+            np.array([data], dtype=self.to_dtype())
+            return True  # noqa: TRY300
+        except ValueError:
+            return False
+
+    def _cast_value_unsafe(self, value: object) -> np.datetime64:
+        return self.to_dtype().type(value)  # type: ignore[no-any-return, call-overload]
diff --git a/src/zarr/core/metadata/v2.py b/src/zarr/core/metadata/v2.py
index 23824520f7..aa2837f598 100644
--- a/src/zarr/core/metadata/v2.py
+++ b/src/zarr/core/metadata/v2.py
@@ -291,7 +291,7 @@ def _parse_structured_fill_value(fill_value: Any, dtype: np.dtype[Any]) -> Any:
         raise ValueError(f"Fill_value {fill_value} is not valid for dtype {dtype}.") from e
 
 
-def parse_fill_value(fill_value: object, dtype: np.dtype[Any]) -> Any:
+def parse_fill_value(fill_value: Any, dtype: np.dtype[Any]) -> Any:
     """
     Parse a potential fill value into a value that is compatible with the provided dtype.
 
diff --git a/src/zarr/core/metadata/v3.py b/src/zarr/core/metadata/v3.py
index 559298c13f..b82fb54270 100644
--- a/src/zarr/core/metadata/v3.py
+++ b/src/zarr/core/metadata/v3.py
@@ -173,6 +173,7 @@ def __init__(
         chunk_grid_parsed = ChunkGrid.from_dict(chunk_grid)
         chunk_key_encoding_parsed = ChunkKeyEncoding.from_dict(chunk_key_encoding)
         dimension_names_parsed = parse_dimension_names(dimension_names)
+        # Note: relying on a type method is numpy-specific
         fill_value_parsed = data_type.to_dtype().type(fill_value)
         attributes_parsed = parse_attributes(attributes)
         codecs_parsed_partial = parse_codecs(codecs)
diff --git a/tests/conftest.py b/tests/conftest.py
index b416e56682..b2f57310e3 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -21,7 +21,9 @@
 from zarr.core.common import JSON, parse_shapelike
 from zarr.core.config import config as zarr_config
 from zarr.core.dtype import data_type_registry, get_data_type_from_native_dtype
-from zarr.core.dtype._numpy import DateTime64, HasLength, Structured
+from zarr.core.dtype.common import HasLength
+from zarr.core.dtype.npy.sized import Structured
+from zarr.core.dtype.npy.time import DateTime64
 from zarr.core.metadata.v2 import ArrayV2Metadata
 from zarr.core.metadata.v3 import ArrayV3Metadata
 from zarr.core.sync import sync
diff --git a/tests/package_with_entrypoint/__init__.py b/tests/package_with_entrypoint/__init__.py
index eed2ac43e5..941f7e71c2 100644
--- a/tests/package_with_entrypoint/__init__.py
+++ b/tests/package_with_entrypoint/__init__.py
@@ -9,7 +9,7 @@
 from zarr.core.array_spec import ArraySpec
 from zarr.core.buffer import Buffer, NDBuffer
 from zarr.core.common import BytesLike
-from zarr.core.dtype import Bool
+from zarr.core.dtype.npy.bool import Bool
 
 
 class TestEntrypointCodec(ArrayBytesCodec):
diff --git a/tests/test_array.py b/tests/test_array.py
index ff544ad447..ade63f6e43 100644
--- a/tests/test_array.py
+++ b/tests/test_array.py
@@ -40,14 +40,14 @@
 from zarr.core.chunk_grids import _auto_partition
 from zarr.core.common import JSON, MemoryOrder, ZarrFormat
 from zarr.core.dtype import get_data_type_from_native_dtype
-from zarr.core.dtype._numpy import (
-    DateTime64,
-    Float64,
-    Int16,
+from zarr.core.dtype.common import Endianness
+from zarr.core.dtype.npy.common import endianness_from_numpy_str
+from zarr.core.dtype.npy.float import Float64
+from zarr.core.dtype.npy.int import Int16
+from zarr.core.dtype.npy.sized import (
     Structured,
-    endianness_from_numpy_str,
 )
-from zarr.core.dtype.common import Endianness
+from zarr.core.dtype.npy.time import DateTime64
 from zarr.core.dtype.wrapper import ZDType
 from zarr.core.group import AsyncGroup
 from zarr.core.indexing import BasicIndexer, ceildiv
diff --git a/tests/test_config.py b/tests/test_config.py
index a2a84e7e7e..d897354690 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -24,7 +24,7 @@
 from zarr.core.buffer import NDBuffer
 from zarr.core.codec_pipeline import BatchedCodecPipeline
 from zarr.core.config import BadConfigError, config
-from zarr.core.dtype._numpy import Int8, VariableLengthString
+from zarr.core.dtype import Int8, VariableLengthString
 from zarr.core.indexing import SelectorTuple
 from zarr.registry import (
     fully_qualified_name,
diff --git a/tests/test_dtype.py b/tests/test_dtype.py
index 122949664c..2b520383b1 100644
--- a/tests/test_dtype.py
+++ b/tests/test_dtype.py
@@ -7,6 +7,12 @@
 
 import zarr
 from zarr.core.config import config
+from zarr.core.dtype.npy.bool import Bool
+from zarr.core.dtype.npy.complex import Complex64, Complex128
+from zarr.core.dtype.npy.float import Float16, Float32, Float64
+from zarr.core.dtype.npy.int import Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64
+from zarr.core.dtype.npy.sized import FixedLengthAscii, FixedLengthBytes, FixedLengthUnicode
+from zarr.core.dtype.npy.time import DateTime64
 
 from .conftest import zdtype_examples
 
@@ -26,28 +32,10 @@
     data_type_registry,
     get_data_type_from_json,
 )
-from zarr.core.dtype._numpy import (
-    Bool,
-    Complex64,
-    Complex128,
-    DateTime64,
-    FixedLengthAscii,
-    FixedLengthBytes,
-    FixedLengthUnicode,
-    Float16,
-    Float32,
-    Float64,
-    Int8,
-    Int16,
-    Int32,
-    Int64,
+from zarr.core.dtype.common import DataTypeValidationError
+from zarr.core.dtype.npy.sized import (
     Structured,
-    UInt8,
-    UInt16,
-    UInt32,
-    UInt64,
 )
-from zarr.core.dtype.common import DataTypeValidationError
 from zarr.core.dtype.registry import DataTypeRegistry
 
 
diff --git a/tests/test_info.py b/tests/test_info.py
index 2e465b6a21..339a0ad419 100644
--- a/tests/test_info.py
+++ b/tests/test_info.py
@@ -5,7 +5,7 @@
 from zarr.codecs.bytes import BytesCodec
 from zarr.core._info import ArrayInfo, GroupInfo, human_readable_size
 from zarr.core.common import ZarrFormat
-from zarr.core.dtype._numpy import Int32
+from zarr.core.dtype.npy.int import Int32
 
 ZARR_FORMATS = [2, 3]
 
diff --git a/tests/test_metadata/test_v2.py b/tests/test_metadata/test_v2.py
index 2eec9a6c74..45913830c3 100644
--- a/tests/test_metadata/test_v2.py
+++ b/tests/test_metadata/test_v2.py
@@ -9,7 +9,8 @@
 import zarr.storage
 from zarr.core.buffer import cpu
 from zarr.core.buffer.core import default_buffer_prototype
-from zarr.core.dtype._numpy import Float32, Float64, Int16
+from zarr.core.dtype.npy.float import Float32, Float64
+from zarr.core.dtype.npy.int import Int16
 from zarr.core.group import ConsolidatedMetadata, GroupMetadata
 from zarr.core.metadata import ArrayV2Metadata
 from zarr.core.metadata.v2 import parse_zarr_format
diff --git a/tests/test_metadata/test_v3.py b/tests/test_metadata/test_v3.py
index cd30f5cf3f..fa23dccf59 100644
--- a/tests/test_metadata/test_v3.py
+++ b/tests/test_metadata/test_v3.py
@@ -12,8 +12,8 @@
 from zarr.core.chunk_key_encodings import DefaultChunkKeyEncoding, V2ChunkKeyEncoding
 from zarr.core.config import config
 from zarr.core.dtype import get_data_type_from_native_dtype
-from zarr.core.dtype._numpy import DateTime64
-from zarr.core.dtype.common import check_json_complex_float
+from zarr.core.dtype.npy.common import check_json_complex_float
+from zarr.core.dtype.npy.time import DateTime64
 from zarr.core.group import GroupMetadata, parse_node_type
 from zarr.core.metadata.v3 import (
     ArrayV3Metadata,

From afc98725e1d0117e4c45e50f24db2b1e49890adf Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Tue, 29 Apr 2025 21:50:15 +0200
Subject: [PATCH 069/130] add timedelta64

---
 src/zarr/core/dtype/__init__.py   |   4 +-
 src/zarr/core/dtype/npy/common.py |   3 +-
 src/zarr/core/dtype/npy/time.py   | 239 +++++++++++++++++++++++++-----
 src/zarr/testing/strategies.py    |   4 +-
 tests/test_array.py               |   6 +-
 5 files changed, 208 insertions(+), 48 deletions(-)

diff --git a/src/zarr/core/dtype/__init__.py b/src/zarr/core/dtype/__init__.py
index 63b593fd28..4cd71bb8bc 100644
--- a/src/zarr/core/dtype/__init__.py
+++ b/src/zarr/core/dtype/__init__.py
@@ -12,7 +12,7 @@
     FixedLengthUnicode,
     Structured,
 )
-from zarr.core.dtype.npy.time import DateTime64
+from zarr.core.dtype.npy.time import DateTime64, TimeDelta64
 
 if TYPE_CHECKING:
     from zarr.core.common import ZarrFormat
@@ -43,6 +43,7 @@
     "Int32",
     "Int64",
     "Structured",
+    "TimeDelta64",
     "UInt8",
     "UInt16",
     "UInt32",
@@ -68,6 +69,7 @@
     | FixedLengthBytes
     | Structured
     | DateTime64
+    | TimeDelta64
 )
 
 ZDTypeLike: TypeAlias = npt.DTypeLike | ZDType[_BaseDType, _BaseScalar] | dict[str, JSON]
diff --git a/src/zarr/core/dtype/npy/common.py b/src/zarr/core/dtype/npy/common.py
index 6571002bbb..c079664aa5 100644
--- a/src/zarr/core/dtype/npy/common.py
+++ b/src/zarr/core/dtype/npy/common.py
@@ -26,8 +26,7 @@
 IntLike = SupportsInt | SupportsIndex | bytes | str
 FloatLike = SupportsIndex | SupportsFloat | bytes | str
 ComplexLike = SupportsFloat | SupportsIndex | SupportsComplex | bytes | str | None
-DateUnit = Literal["Y", "M", "W", "D"]
-TimeUnit = Literal["h", "m", "s", "ms", "us", "μs", "ns", "ps", "fs", "as"]
+DateTimeUnit = Literal["Y", "M", "W", "D", "h", "m", "s", "ms", "us", "μs", "ns", "ps", "fs", "as"]
 EndiannessNumpy = Literal[">", "<", "|", "="]
 
 TFloatDType_co = TypeVar(
diff --git a/src/zarr/core/dtype/npy/time.py b/src/zarr/core/dtype/npy/time.py
index a10b9ae8a3..030b01c769 100644
--- a/src/zarr/core/dtype/npy/time.py
+++ b/src/zarr/core/dtype/npy/time.py
@@ -1,15 +1,15 @@
 from __future__ import annotations
 
+import re
 from dataclasses import dataclass
-from typing import TYPE_CHECKING, Self, TypeGuard, cast, get_args
+from typing import TYPE_CHECKING, Literal, Self, TypeGuard, cast, get_args
 
 import numpy as np
 
 from zarr.core.dtype.common import DataTypeValidationError, HasEndianness
 from zarr.core.dtype.npy.common import (
-    DateUnit,
+    DateTimeUnit,
     EndiannessNumpy,
-    TimeUnit,
     check_json_int,
     endianness_from_numpy_str,
     endianness_to_numpy_str,
@@ -19,15 +19,58 @@
 if TYPE_CHECKING:
     from zarr.core.common import JSON, ZarrFormat
 
+_DTypeName = Literal["datetime64", "timedelta64"]
 
-def datetime_from_json(data: int, unit: DateUnit | TimeUnit) -> np.datetime64:
+
+def parse_timedtype_name(name: str) -> tuple[_DTypeName, DateTimeUnit | None]:
+    """
+    Parse a string like "datetime64[s]" into a tuple like ("datetime64", "s").
     """
-    Convert a JSON integer to a datetime64.
+    dtype_name: _DTypeName
+    unit: DateTimeUnit | None
+
+    if name.startswith("datetime64"):
+        dtype_name = "datetime64"
+    elif name.startswith("timedelta64"):
+        dtype_name = "timedelta64"
+    else:
+        msg = (
+            f"Invalid dtype name. Expected a string starting with on of {get_args(_DTypeName)}. "
+            f"Got {name!r} instead."
+        )
+        raise ValueError(msg)
+
+    regex = re.search(r"\[(.*?)\]", name)
+
+    if regex is None:
+        if dtype_name == "timedelta64":
+            unit = None
+        else:
+            msg = (
+                "The name of a datetime64 dtype must end with a specification of a unit. "
+                'For example, "datetime64[s].'
+                f"Got {name!r}, which does not follow this pattern."
+            )
+            raise ValueError(msg)
+    else:
+        maybe_unit = regex.group(1)
+        unit_expected = get_args(DateTimeUnit)
+        if maybe_unit not in unit_expected:
+            msg = f"Invalid unit. Expected one of {unit_expected}. Got {maybe_unit} instead."
+            raise ValueError(msg)
+        unit = maybe_unit  # type: ignore[assignment]
+
+    return dtype_name, unit
+
+
+def datetime_from_int(data: int, unit: DateTimeUnit) -> np.datetime64:
+    """
+    Convert an integer to a datetime64.
 
     Parameters
     ----------
     data : int
-        The JSON integer to convert.
+        The integer to convert.
     unit : DateUnit or TimeUnit
         The unit of the datetime64.
 
@@ -39,33 +82,150 @@ def datetime_from_json(data: int, unit: DateUnit | TimeUnit) -> np.datetime64:
     return cast("np.datetime64", np.int64(data).view(f"datetime64[{unit}]"))
 
 
-def datetime_to_json(data: np.datetime64) -> int:
+def datetimelike_to_int(data: np.datetime64 | np.timedelta64) -> int:
     """
-    Convert a datetime64 to a JSON integer.
+    Convert a datetime64 or a timedelta64 to an integer.
 
     Parameters
     ----------
-    data : np.datetime64
-        The datetime64 value to convert.
+    data : np.datetime64 | np.timedelta64
+        The value to convert.
 
     Returns
     -------
     int
-        The JSON representation of the datetime64.
+        An integer representation of the scalar.
     """
     return data.view(np.int64).item()
 
 
+def timedelta_from_int(data: int, unit: DateTimeUnit | None) -> np.timedelta64:
+    """
+    Convert an integer to a timedelta64.
+
+    Parameters
+    ----------
+    data : int
+        The integer to convert.
+    unit : DateUnit or TimeUnit
+        The unit of the timedelta64.
+
+    Returns
+    -------
+    np.timedelta64
+        The timedelta64 value.
+    """
+    if unit is not None:
+        dtype_name = f"timedelta64[{unit}]"
+    else:
+        dtype_name = "timedelta64"
+    return cast("np.timedelta64", np.int64(data).view(dtype_name))
+
+
+@dataclass(frozen=True, kw_only=True, slots=True)
+class TimeDelta64(ZDType[np.dtypes.TimeDelta64DType, np.timedelta64], HasEndianness):
+    """
+    A wrapper for the ``TimeDelta64`` data type defined in numpy.
+    Scalars of this type can be created by performing arithmetic with ``DateTime64`` scalars.
+    Like ``DateTime64``, ``TimeDelta64`` is parametrized by a unit, but unlike ``DateTime64``, the
+    unit for ``TimeDelta64`` is optional.
+    """
+
+    dtype_cls = np.dtypes.TimeDelta64DType  # type: ignore[assignment]
+    _zarr_v3_name = "numpy.timedelta64"
+    unit: DateTimeUnit | None = None
+
+    @classmethod
+    def _from_dtype_unsafe(cls, dtype: _BaseDType) -> Self:
+        _, unit = parse_timedtype_name(dtype.name)
+        byteorder = cast("EndiannessNumpy", dtype.byteorder)
+        return cls(unit=unit, endianness=endianness_from_numpy_str(byteorder))
+
+    def to_dtype(self) -> np.dtypes.TimeDelta64DType:
+        # Numpy does not allow creating timedelta64 via
+        # np.dtypes.TimeDelta64DType()
+        if self.unit is not None:
+            dtype_string = f"timedelta64[{self.unit}]"
+        else:
+            dtype_string = "timedelta64"
+        dt = np.dtype(dtype_string).newbyteorder(endianness_to_numpy_str(self.endianness))
+        return cast("np.dtypes.TimeDelta64DType", dt)
+
+    def default_value(self) -> np.timedelta64:
+        return np.timedelta64("NaT")
+
+    def to_json(self, zarr_format: ZarrFormat) -> JSON:
+        if zarr_format == 2:
+            return self.to_dtype().str
+        elif zarr_format == 3:
+            return {"name": self._zarr_v3_name, "configuration": {"unit": self.unit}}
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+
+    @classmethod
+    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
+        if zarr_format == 2:
+            return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
+        elif zarr_format == 3:
+            return cls(unit=data["configuration"]["unit"])  # type: ignore[arg-type, index, call-overload]
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+
+    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.timedelta64:
+        if check_json_int(data):
+            return timedelta_from_int(data, self.unit)
+        raise TypeError(f"Invalid type: {data}. Expected an integer.")
+
+    def to_json_value(self, data: object, *, zarr_format: ZarrFormat) -> int:
+        return datetimelike_to_int(data)  # type: ignore[arg-type]
+
+    def check_value(self, data: object) -> bool:
+        # TODO: decide which values we should accept for datetimes.
+        try:
+            np.array([data], dtype=self.to_dtype())
+            return True  # noqa: TRY300
+        except ValueError:
+            return False
+
+    def _cast_value_unsafe(self, value: object) -> np.timedelta64:
+        return self.to_dtype().type(value)  # type: ignore[arg-type]
+
+    @classmethod
+    def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[JSON]:
+        if zarr_format == 2:
+            # match <m[ns], >m[M], etc
+            # consider making this a standalone function
+            if not (isinstance(data, str) and data[0] in (">", "<") and data[1:3] == "m8"):
+                return False
+            if len(data) == 3:
+                # no unit, and
+                # we already checked that this string is either <m8 or >m8
+                return True
+            if len(data) in (6, 7):
+                return data[4:-1] in get_args(DateTimeUnit) and data[-1] == "]"
+            else:
+                return False
+        elif zarr_format == 3:
+            return (
+                isinstance(data, dict)
+                and set(data.keys()) == {"name", "configuration"}
+                and data["name"] == cls._zarr_v3_name
+                and set(data.keys()) == {"name", "configuration"}
+                and isinstance(data["configuration"], dict)
+                and set(data["configuration"].keys()) in ({"unit"}, {})
+                and data["configuration"].get("unit", None) in (*get_args(DateTimeUnit), None)
+            )
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+
+
 @dataclass(frozen=True, kw_only=True, slots=True)
 class DateTime64(ZDType[np.dtypes.DateTime64DType, np.datetime64], HasEndianness):
     dtype_cls = np.dtypes.DateTime64DType  # type: ignore[assignment]
     _zarr_v3_name = "numpy.datetime64"
-    unit: DateUnit | TimeUnit
+    unit: DateTimeUnit
 
     @classmethod
     def _from_dtype_unsafe(cls, dtype: _BaseDType) -> Self:
-        unit: DateUnit | TimeUnit = dtype.name[dtype.name.rfind("[") + 1 : dtype.name.rfind("]")]  # type: ignore[assignment]
-        if unit not in get_args(DateUnit) and unit not in get_args(TimeUnit):
+        unit: DateTimeUnit = dtype.name[dtype.name.rfind("[") + 1 : dtype.name.rfind("]")]  # type: ignore[assignment]
+        if unit not in get_args(DateTimeUnit):
             raise DataTypeValidationError('Invalid unit for "numpy.datetime64"')
         byteorder = cast("EndiannessNumpy", dtype.byteorder)
         return cls(unit=unit, endianness=endianness_from_numpy_str(byteorder))
@@ -80,30 +240,6 @@ def to_dtype(self) -> np.dtypes.DateTime64DType:
             ),
         )
 
-    @classmethod
-    def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[JSON]:
-        if zarr_format == 2:
-            # match <M[ns], >M[M], etc
-            # consider making this a standalone function
-            return (
-                isinstance(data, str)
-                and len(data) in (6, 7)
-                and data[0] in (">", "<")
-                and data[1:4] == "M8["
-                and data[4:-1] in get_args(TimeUnit) + get_args(DateUnit)
-                and data[-1] == "]"
-            )
-        elif zarr_format == 3:
-            return (
-                isinstance(data, dict)
-                and "name" in data
-                and data["name"] == cls._zarr_v3_name
-                and "configuration" in data
-                and "unit" in data["configuration"]
-                and data["configuration"]["unit"] in get_args(DateUnit) + get_args(TimeUnit)
-            )
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
-
     def default_value(self) -> np.datetime64:
         return np.datetime64("NaT")
 
@@ -124,11 +260,11 @@ def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
 
     def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.datetime64:
         if check_json_int(data):
-            return datetime_from_json(data, self.unit)
+            return datetime_from_int(data, self.unit)
         raise TypeError(f"Invalid type: {data}. Expected an integer.")
 
     def to_json_value(self, data: object, *, zarr_format: ZarrFormat) -> int:
-        return datetime_to_json(data)  # type: ignore[arg-type]
+        return datetimelike_to_int(data)  # type: ignore[arg-type]
 
     def check_value(self, data: object) -> bool:
         # TODO: decide which values we should accept for datetimes.
@@ -140,3 +276,26 @@ def check_value(self, data: object) -> bool:
 
     def _cast_value_unsafe(self, value: object) -> np.datetime64:
         return self.to_dtype().type(value)  # type: ignore[no-any-return, call-overload]
+
+    @classmethod
+    def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[JSON]:
+        if zarr_format == 2:
+            # match <M[ns], >M[M], etc
+            # consider making this a standalone function
+            return (
+                isinstance(data, str)
+                and len(data) in (6, 7)
+                and data[0] in (">", "<")
+                and data[1:4] == "M8["
+                and data[4:-1] in get_args(DateTimeUnit)
+                and data[-1] == "]"
+            )
+        elif zarr_format == 3:
+            return (
+                isinstance(data, dict)
+                and set(data.keys()) == {"name", "configuration"}
+                and data["name"] == cls._zarr_v3_name
+                and set(data["configuration"].keys()) == {"unit"}
+                and data["configuration"]["unit"] in get_args(DateTimeUnit)
+            )
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
diff --git a/src/zarr/testing/strategies.py b/src/zarr/testing/strategies.py
index f371a88e83..af4ab831ec 100644
--- a/src/zarr/testing/strategies.py
+++ b/src/zarr/testing/strategies.py
@@ -55,7 +55,7 @@ def v3_dtypes() -> st.SearchStrategy[np.dtype]:
     )
 
 
-def v2_dtypes() -> st.SearchStrategy[np.dtype]:
+def v2_dtypes() -> st.SearchStrategy[np.dtype[Any]]:
     return (
         npst.boolean_dtypes()
         | npst.integer_dtypes(endianness="=")
@@ -65,7 +65,7 @@ def v2_dtypes() -> st.SearchStrategy[np.dtype]:
         | npst.byte_string_dtypes(endianness="=")
         | npst.unicode_string_dtypes(endianness="=")
         | npst.datetime64_dtypes(endianness="=")
-        # | npst.timedelta64_dtypes()
+        | npst.timedelta64_dtypes(endianness="?")
     )
 
 
diff --git a/tests/test_array.py b/tests/test_array.py
index ade63f6e43..aa6dfd0f07 100644
--- a/tests/test_array.py
+++ b/tests/test_array.py
@@ -47,7 +47,7 @@
 from zarr.core.dtype.npy.sized import (
     Structured,
 )
-from zarr.core.dtype.npy.time import DateTime64
+from zarr.core.dtype.npy.time import DateTime64, TimeDelta64
 from zarr.core.dtype.wrapper import ZDType
 from zarr.core.group import AsyncGroup
 from zarr.core.indexing import BasicIndexer, ceildiv
@@ -969,7 +969,7 @@ def test_default_fill_value(dtype: ZDType[Any, Any], store: Store) -> None:
         Test that the fill value of an array is set to the default value for the dtype object
         """
         a = zarr.create_array(store, shape=(5,), chunks=(5,), dtype=dtype)
-        if isinstance(dtype, DateTime64) and np.isnat(a.fill_value):
+        if isinstance(dtype, DateTime64 | TimeDelta64) and np.isnat(a.fill_value):
             assert np.isnat(dtype.default_value())
         else:
             assert a.fill_value == dtype.default_value()
@@ -1350,7 +1350,7 @@ def test_default_endianness(
         """
         dtype = Int16(endianness=endianness)
         arr = zarr.create_array(store=store, shape=(1,), dtype=dtype, zarr_format=zarr_format)
-        assert endianness_from_numpy_str(arr[:].dtype.byteorder) == endianness
+        assert endianness_from_numpy_str(arr[:].dtype.byteorder) == endianness  # type: ignore[union-attr]
 
 
 @pytest.mark.parametrize("value", [1, 1.4, "a", b"a", np.array(1)])

From e1bf90135d0fad1feba88b9b360bae102b681976 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Wed, 30 Apr 2025 22:20:40 +0200
Subject: [PATCH 070/130] refactor time dtypes

---
 src/zarr/core/buffer/core.py      |  11 +-
 src/zarr/core/dtype/__init__.py   |   1 +
 src/zarr/core/dtype/npy/common.py |   4 +-
 src/zarr/core/dtype/npy/time.py   | 285 +++++++++++++-----------------
 src/zarr/testing/strategies.py    |   4 +-
 tests/conftest.py                 |  14 +-
 tests/test_properties.py          |   9 +-
 7 files changed, 139 insertions(+), 189 deletions(-)

diff --git a/src/zarr/core/buffer/core.py b/src/zarr/core/buffer/core.py
index 7be9dc8bf4..d50c50cc79 100644
--- a/src/zarr/core/buffer/core.py
+++ b/src/zarr/core/buffer/core.py
@@ -427,16 +427,7 @@ def as_scalar(self) -> ScalarType:
         """Returns the buffer as a scalar value"""
         if self._data.size != 1:
             raise ValueError("Buffer does not contain a single scalar value")
-        item = self.as_numpy_array().item()
-        scalar: ScalarType
-
-        if np.issubdtype(self.dtype, np.datetime64):
-            unit: str = np.datetime_data(self.dtype)[0]  # Extract the unit (e.g., 'Y', 'D', etc.)
-            scalar = np.datetime64(item, unit)
-        else:
-            scalar = self.dtype.type(item)  # Regular conversion for non-datetime types
-
-        return scalar
+        return cast(ScalarType, self.as_numpy_array()[()])
 
     @property
     def dtype(self) -> np.dtype[Any]:
diff --git a/src/zarr/core/dtype/__init__.py b/src/zarr/core/dtype/__init__.py
index 4cd71bb8bc..f535f62f35 100644
--- a/src/zarr/core/dtype/__init__.py
+++ b/src/zarr/core/dtype/__init__.py
@@ -44,6 +44,7 @@
     "Int64",
     "Structured",
     "TimeDelta64",
+    "TimeDelta64",
     "UInt8",
     "UInt16",
     "UInt32",
diff --git a/src/zarr/core/dtype/npy/common.py b/src/zarr/core/dtype/npy/common.py
index c079664aa5..857c515c19 100644
--- a/src/zarr/core/dtype/npy/common.py
+++ b/src/zarr/core/dtype/npy/common.py
@@ -26,7 +26,9 @@
 IntLike = SupportsInt | SupportsIndex | bytes | str
 FloatLike = SupportsIndex | SupportsFloat | bytes | str
 ComplexLike = SupportsFloat | SupportsIndex | SupportsComplex | bytes | str | None
-DateTimeUnit = Literal["Y", "M", "W", "D", "h", "m", "s", "ms", "us", "μs", "ns", "ps", "fs", "as"]
+DateTimeUnit = Literal[
+    "Y", "M", "W", "D", "h", "m", "s", "ms", "us", "μs", "ns", "ps", "fs", "as", "generic"
+]
 EndiannessNumpy = Literal[">", "<", "|", "="]
 
 TFloatDType_co = TypeVar(
diff --git a/src/zarr/core/dtype/npy/time.py b/src/zarr/core/dtype/npy/time.py
index 030b01c769..056836a105 100644
--- a/src/zarr/core/dtype/npy/time.py
+++ b/src/zarr/core/dtype/npy/time.py
@@ -1,12 +1,23 @@
 from __future__ import annotations
 
-import re
+from collections.abc import Mapping
 from dataclasses import dataclass
-from typing import TYPE_CHECKING, Literal, Self, TypeGuard, cast, get_args
+from typing import (
+    TYPE_CHECKING,
+    ClassVar,
+    Generic,
+    Literal,
+    Self,
+    TypedDict,
+    TypeGuard,
+    TypeVar,
+    cast,
+    get_args,
+)
 
 import numpy as np
 
-from zarr.core.dtype.common import DataTypeValidationError, HasEndianness
+from zarr.core.dtype.common import HasEndianness
 from zarr.core.dtype.npy.common import (
     DateTimeUnit,
     EndiannessNumpy,
@@ -22,48 +33,7 @@
 _DTypeName = Literal["datetime64", "timedelta64"]
 
 
-def parse_timedtype_name(name: str) -> tuple[_DTypeName, DateTimeUnit | None]:
-    """
-    Parse a string like "datetime64[s]" into a tuple like ("datetime64", "s").
-    """
-    dtype_name: _DTypeName
-    unit: DateTimeUnit | None
-
-    if name.startswith("datetime64"):
-        dtype_name = "datetime64"
-    elif name.startswith("timedelta64"):
-        dtype_name = "timedelta64"
-    else:
-        msg = (
-            f"Invalid dtype name. Expected a string starting with on of {get_args(_DTypeName)}. "
-            f"Got {name!r} instead."
-        )
-        raise ValueError(msg)
-
-    regex = re.search(r"\[(.*?)\]", name)
-
-    if regex is None:
-        if dtype_name == "timedelta64":
-            unit = None
-        else:
-            msg = (
-                "The name of a datetime64 dtype must end with a specification of a unit. "
-                'For example, "datetime64[s].'
-                f"Got {name!r}, which does not follow this pattern."
-            )
-            raise ValueError(msg)
-    else:
-        maybe_unit = regex.group(1)
-        unit_expected = get_args(DateTimeUnit)
-        if maybe_unit not in unit_expected:
-            msg = f"Invalid unit. Expected one of {unit_expected}. Got {maybe_unit} instead."
-            raise ValueError(msg)
-        unit = maybe_unit  # type: ignore[assignment]
-
-    return dtype_name, unit
-
-
-def datetime_from_int(data: int, unit: DateTimeUnit) -> np.datetime64:
+def datetime_from_int(data: int, *, unit: DateTimeUnit, interval: int) -> np.datetime64:
     """
     Convert an integer to a datetime64.
 
@@ -71,15 +41,18 @@ def datetime_from_int(data: int, unit: DateTimeUnit) -> np.datetime64:
     ----------
     data : int
         The integer to convert.
-    unit : DateUnit or TimeUnit
+    unit : DateTimeUnit
         The unit of the datetime64.
+    interval : int
+        The interval of the datetime64.
 
     Returns
     -------
     np.datetime64
         The datetime64 value.
     """
-    return cast("np.datetime64", np.int64(data).view(f"datetime64[{unit}]"))
+    dtype_name = f"datetime64[{interval}{unit}]"
+    return cast("np.datetime64", np.int64(data).view(dtype_name))
 
 
 def datetimelike_to_int(data: np.datetime64 | np.timedelta64) -> int:
@@ -99,80 +72,74 @@ def datetimelike_to_int(data: np.datetime64 | np.timedelta64) -> int:
     return data.view(np.int64).item()
 
 
-def timedelta_from_int(data: int, unit: DateTimeUnit | None) -> np.timedelta64:
-    """
-    Convert an integer to a timedelta64.
+_BaseTimeDType_co = TypeVar(
+    "_BaseTimeDType_co",
+    bound=np.dtypes.TimeDelta64DType | np.dtypes.DateTime64DType,
+    covariant=True,
+)
+_BaseTimeScalar = TypeVar("_BaseTimeScalar", bound=np.timedelta64 | np.datetime64)
 
-    Parameters
-    ----------
-    data : int
-        The integer to convert.
-    unit : DateUnit or TimeUnit
-        The unit of the timedelta64.
+TName = TypeVar("TName", bound=str)
+TConfig = TypeVar("TConfig", bound=Mapping[str, object])
 
-    Returns
-    -------
-    np.timedelta64
-        The timedelta64 value.
-    """
-    if unit is not None:
-        dtype_name = f"timedelta64[{unit}]"
-    else:
-        dtype_name = "timedelta64"
-    return cast("np.timedelta64", np.int64(data).view(dtype_name))
 
+class NamedConfig(TypedDict, Generic[TName, TConfig]):
+    name: TName
+    configuration: TConfig
 
-@dataclass(frozen=True, kw_only=True, slots=True)
-class TimeDelta64(ZDType[np.dtypes.TimeDelta64DType, np.timedelta64], HasEndianness):
-    """
-    A wrapper for the ``TimeDelta64`` data type defined in numpy.
-    Scalars of this type can be created by performing arithmetic with ``DateTime64`` scalars.
-    Like ``DateTime64``, ``TimeDelta64`` is parametrized by a unit, but unlike ``DateTime64``, the
-    unit for ``TimeDelta64`` is optional.
-    """
 
-    dtype_cls = np.dtypes.TimeDelta64DType  # type: ignore[assignment]
-    _zarr_v3_name = "numpy.timedelta64"
-    unit: DateTimeUnit | None = None
+class TimeConfig(TypedDict):
+    unit: DateTimeUnit
+    interval: int
+
+
+# aspirational
+DateTime64MetaParams = NamedConfig[Literal["numpy.datetime64"], TimeConfig]
+TimeDelta64MetaParams = NamedConfig[Literal["numpy.timedelta64"], TimeConfig]
+
+
+@dataclass(frozen=True, kw_only=True, slots=True)
+class TimeDTypeBase(ZDType[_BaseTimeDType_co, _BaseTimeScalar], HasEndianness):
+    _zarr_v2_names: ClassVar[tuple[str, ...]]
+    # this attribute exists so that we can programmatically create a numpy dtype instance
+    # because the particular numpy dtype we are wrapping does not allow direct construction via
+    # cls.dtype_cls()
+    _numpy_name: ClassVar[_DTypeName]
+    interval: int
+    unit: DateTimeUnit
 
     @classmethod
     def _from_dtype_unsafe(cls, dtype: _BaseDType) -> Self:
-        _, unit = parse_timedtype_name(dtype.name)
+        unit, interval = np.datetime_data(dtype.name)
         byteorder = cast("EndiannessNumpy", dtype.byteorder)
-        return cls(unit=unit, endianness=endianness_from_numpy_str(byteorder))
-
-    def to_dtype(self) -> np.dtypes.TimeDelta64DType:
-        # Numpy does not allow creating timedelta64 via
-        # np.dtypes.TimeDelta64DType()
-        if self.unit is not None:
-            dtype_string = f"timedelta64[{self.unit}]"
-        else:
-            dtype_string = "timedelta64"
-        dt = np.dtype(dtype_string).newbyteorder(endianness_to_numpy_str(self.endianness))
-        return cast("np.dtypes.TimeDelta64DType", dt)
+        return cls(unit=unit, interval=interval, endianness=endianness_from_numpy_str(byteorder))  # type: ignore[arg-type]
 
-    def default_value(self) -> np.timedelta64:
-        return np.timedelta64("NaT")
-
-    def to_json(self, zarr_format: ZarrFormat) -> JSON:
-        if zarr_format == 2:
-            return self.to_dtype().str
-        elif zarr_format == 3:
-            return {"name": self._zarr_v3_name, "configuration": {"unit": self.unit}}
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+    def to_dtype(self) -> _BaseTimeDType_co:
+        # Numpy does not allow creating datetime64 or timedelta64 via
+        # np.dtypes.{dtype_name}()
+        # so we use np.dtype with a formatted string.
+        dtype_string = f"{self._numpy_name}[{self.interval}{self.unit}]"
+        return np.dtype(dtype_string).newbyteorder(endianness_to_numpy_str(self.endianness))  # type: ignore[return-value]
 
     @classmethod
     def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
         if zarr_format == 2:
             return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
         elif zarr_format == 3:
-            return cls(unit=data["configuration"]["unit"])  # type: ignore[arg-type, index, call-overload]
+            unit = data["configuration"]["unit"]  # type: ignore[index, call-overload]
+            interval = data["configuration"]["interval"]  # type: ignore[index, call-overload]
+            return cls(unit=unit, interval=interval)  # type: ignore[arg-type]
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
-    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.timedelta64:
-        if check_json_int(data):
-            return timedelta_from_int(data, self.unit)
-        raise TypeError(f"Invalid type: {data}. Expected an integer.")
+    def to_json(self, zarr_format: ZarrFormat) -> JSON:
+        if zarr_format == 2:
+            return cast("str", self.to_dtype().str)
+        elif zarr_format == 3:
+            return {
+                "name": self._zarr_v3_name,
+                "configuration": {"unit": self.unit, "interval": self.interval},
+            }
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     def to_json_value(self, data: object, *, zarr_format: ZarrFormat) -> int:
         return datetimelike_to_int(data)  # type: ignore[arg-type]
@@ -185,6 +152,31 @@ def check_value(self, data: object) -> bool:
         except ValueError:
             return False
 
+
+@dataclass(frozen=True, kw_only=True, slots=True)
+class TimeDelta64(TimeDTypeBase[np.dtypes.TimeDelta64DType, np.timedelta64], HasEndianness):
+    """
+    A wrapper for the ``TimeDelta64`` data type defined in numpy.
+    Scalars of this type can be created by performing arithmetic with ``DateTime64`` scalars.
+    Like ``DateTime64``, ``TimeDelta64`` is parametrized by a unit, but unlike ``DateTime64``, the
+    unit for ``TimeDelta64`` is optional.
+    """
+
+    dtype_cls = np.dtypes.TimeDelta64DType
+    _zarr_v3_name = "numpy.timedelta64"
+    _zarr_v2_names = (">m8", "<m8")
+    _numpy_name = "timedelta64"
+    interval: int = 1
+    unit: DateTimeUnit = "generic"
+
+    def default_value(self) -> np.timedelta64:
+        return np.timedelta64("NaT")
+
+    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.timedelta64:
+        if check_json_int(data):
+            return self.to_dtype().type(data, f"{self.interval}{self.unit}")
+        raise TypeError(f"Invalid type: {data}. Expected an integer.")
+
     def _cast_value_unsafe(self, value: object) -> np.timedelta64:
         return self.to_dtype().type(value)  # type: ignore[arg-type]
 
@@ -193,16 +185,16 @@ def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[JSON]:
         if zarr_format == 2:
             # match <m[ns], >m[M], etc
             # consider making this a standalone function
-            if not (isinstance(data, str) and data[0] in (">", "<") and data[1:3] == "m8"):
+            if not isinstance(data, str):
+                return False
+            if not data.startswith(cls._zarr_v2_names):
                 return False
             if len(data) == 3:
                 # no unit, and
                 # we already checked that this string is either <m8 or >m8
                 return True
-            if len(data) in (6, 7):
-                return data[4:-1] in get_args(DateTimeUnit) and data[-1] == "]"
             else:
-                return False
+                return data[4:-1].endswith(get_args(DateTimeUnit)) and data[-1] == "]"
         elif zarr_format == 3:
             return (
                 isinstance(data, dict)
@@ -210,70 +202,29 @@ def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[JSON]:
                 and data["name"] == cls._zarr_v3_name
                 and set(data.keys()) == {"name", "configuration"}
                 and isinstance(data["configuration"], dict)
-                and set(data["configuration"].keys()) in ({"unit"}, {})
-                and data["configuration"].get("unit", None) in (*get_args(DateTimeUnit), None)
+                and set(data["configuration"].keys()) == {"unit", "interval"}
+                and data["configuration"]["unit"] in get_args(DateTimeUnit)
             )
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
 
 @dataclass(frozen=True, kw_only=True, slots=True)
-class DateTime64(ZDType[np.dtypes.DateTime64DType, np.datetime64], HasEndianness):
-    dtype_cls = np.dtypes.DateTime64DType  # type: ignore[assignment]
+class DateTime64(TimeDTypeBase[np.dtypes.DateTime64DType, np.datetime64], HasEndianness):
+    dtype_cls = np.dtypes.DateTime64DType
     _zarr_v3_name = "numpy.datetime64"
-    unit: DateTimeUnit
-
-    @classmethod
-    def _from_dtype_unsafe(cls, dtype: _BaseDType) -> Self:
-        unit: DateTimeUnit = dtype.name[dtype.name.rfind("[") + 1 : dtype.name.rfind("]")]  # type: ignore[assignment]
-        if unit not in get_args(DateTimeUnit):
-            raise DataTypeValidationError('Invalid unit for "numpy.datetime64"')
-        byteorder = cast("EndiannessNumpy", dtype.byteorder)
-        return cls(unit=unit, endianness=endianness_from_numpy_str(byteorder))
-
-    def to_dtype(self) -> np.dtypes.DateTime64DType:
-        # Numpy does not allow creating datetime64 via
-        # np.dtypes.DateTime64Dtype()
-        return cast(
-            "np.dtypes.DateTime64DType",
-            np.dtype(f"datetime64[{self.unit}]").newbyteorder(
-                endianness_to_numpy_str(self.endianness)
-            ),
-        )
+    _zarr_v2_names = (">M8", "<M8")
+    _numpy_name = "datetime64"
+    unit: DateTimeUnit = "generic"
+    interval: int = 1
 
     def default_value(self) -> np.datetime64:
         return np.datetime64("NaT")
 
-    def to_json(self, zarr_format: ZarrFormat) -> JSON:
-        if zarr_format == 2:
-            return self.to_dtype().str
-        elif zarr_format == 3:
-            return {"name": self._zarr_v3_name, "configuration": {"unit": self.unit}}
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
-
-    @classmethod
-    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
-        if zarr_format == 2:
-            return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
-        elif zarr_format == 3:
-            return cls(unit=data["configuration"]["unit"])  # type: ignore[arg-type, index, call-overload]
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
-
     def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.datetime64:
         if check_json_int(data):
-            return datetime_from_int(data, self.unit)
+            return self.to_dtype().type(data, f"{self.interval}{self.unit}")
         raise TypeError(f"Invalid type: {data}. Expected an integer.")
 
-    def to_json_value(self, data: object, *, zarr_format: ZarrFormat) -> int:
-        return datetimelike_to_int(data)  # type: ignore[arg-type]
-
-    def check_value(self, data: object) -> bool:
-        # TODO: decide which values we should accept for datetimes.
-        try:
-            np.array([data], dtype=self.to_dtype())
-            return True  # noqa: TRY300
-        except ValueError:
-            return False
-
     def _cast_value_unsafe(self, value: object) -> np.datetime64:
         return self.to_dtype().type(value)  # type: ignore[no-any-return, call-overload]
 
@@ -282,20 +233,22 @@ def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[JSON]:
         if zarr_format == 2:
             # match <M[ns], >M[M], etc
             # consider making this a standalone function
-            return (
-                isinstance(data, str)
-                and len(data) in (6, 7)
-                and data[0] in (">", "<")
-                and data[1:4] == "M8["
-                and data[4:-1] in get_args(DateTimeUnit)
-                and data[-1] == "]"
-            )
+            if not isinstance(data, str):
+                return False
+            if not data.startswith(cls._zarr_v2_names):
+                return False
+            if len(data) == 3:
+                # no unit, and
+                # we already checked that this string is either <M8 or >M8
+                return True
+            else:
+                return data[4:-1].endswith(get_args(DateTimeUnit)) and data[-1] == "]"
         elif zarr_format == 3:
             return (
                 isinstance(data, dict)
                 and set(data.keys()) == {"name", "configuration"}
                 and data["name"] == cls._zarr_v3_name
-                and set(data["configuration"].keys()) == {"unit"}
+                and set(data["configuration"].keys()) == {"unit", "interval"}
                 and data["configuration"]["unit"] in get_args(DateTimeUnit)
             )
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
diff --git a/src/zarr/testing/strategies.py b/src/zarr/testing/strategies.py
index af4ab831ec..4184112f5e 100644
--- a/src/zarr/testing/strategies.py
+++ b/src/zarr/testing/strategies.py
@@ -50,8 +50,8 @@ def v3_dtypes() -> st.SearchStrategy[np.dtype]:
         | npst.complex_number_dtypes(endianness="=")
         # | npst.byte_string_dtypes(endianness="=")
         # | npst.unicode_string_dtypes()
-        # | npst.datetime64_dtypes()
-        # | npst.timedelta64_dtypes()
+        | npst.datetime64_dtypes()
+        | npst.timedelta64_dtypes()
     )
 
 
diff --git a/tests/conftest.py b/tests/conftest.py
index b2f57310e3..434763a4f3 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -20,10 +20,14 @@
 from zarr.core.chunk_grids import RegularChunkGrid, _auto_partition
 from zarr.core.common import JSON, parse_shapelike
 from zarr.core.config import config as zarr_config
-from zarr.core.dtype import data_type_registry, get_data_type_from_native_dtype
+from zarr.core.dtype import (
+    DateTime64,
+    Structured,
+    TimeDelta64,
+    data_type_registry,
+    get_data_type_from_native_dtype,
+)
 from zarr.core.dtype.common import HasLength
-from zarr.core.dtype.npy.sized import Structured
-from zarr.core.dtype.npy.time import DateTime64
 from zarr.core.metadata.v2 import ArrayV2Metadata
 from zarr.core.metadata.v3 import ArrayV3Metadata
 from zarr.core.sync import sync
@@ -426,7 +430,7 @@ def meta_from_array(
         zdtype_examples += (wrapper_cls.from_dtype(np.dtype([("a", np.float64), ("b", np.int8)])),)
     elif issubclass(wrapper_cls, HasLength):
         zdtype_examples += (wrapper_cls(length=1),)
-    elif issubclass(wrapper_cls, DateTime64):
-        zdtype_examples += (wrapper_cls(unit="s"),)
+    elif issubclass(wrapper_cls, DateTime64 | TimeDelta64):
+        zdtype_examples += (wrapper_cls(unit="s", interval=10),)
     else:
         zdtype_examples += (wrapper_cls(),)
diff --git a/tests/test_properties.py b/tests/test_properties.py
index 7c741ec873..15dd701582 100644
--- a/tests/test_properties.py
+++ b/tests/test_properties.py
@@ -1,4 +1,3 @@
-import dataclasses
 import json
 import numbers
 from typing import Any
@@ -209,8 +208,8 @@ def test_roundtrip_array_metadata_from_json(data: st.DataObject, zarr_format: in
         zarray_dict = json.loads(buffer_dict[ZARR_JSON].to_bytes().decode())
         metadata_roundtripped = ArrayV3Metadata.from_dict(zarray_dict)
 
-    orig = dataclasses.asdict(metadata)
-    rt = dataclasses.asdict(metadata_roundtripped)
+    orig = metadata.to_dict()
+    rt = metadata_roundtripped.to_dict()
 
     assert deep_equal(orig, rt), f"Roundtrip mismatch:\nOriginal: {orig}\nRoundtripped: {rt}"
 
@@ -323,5 +322,5 @@ def test_array_metadata_meets_spec(meta: ArrayV2Metadata | ArrayV3Metadata) -> N
     elif dtype_native.kind == "c":
         # fill_value should be a two-element array [real, imag].
         assert serialized_complex_float_is_valid(asdict_dict["fill_value"])
-    elif dtype_native.kind == "M" and np.isnat(meta.fill_value):
-        assert asdict_dict["fill_value"] == "NaT"
+    elif dtype_native.kind in ("M", "m") and np.isnat(meta.fill_value):
+        assert asdict_dict["fill_value"] == -9223372036854775808

From 890077ef1d3fb61d08ff9dc8bc31c8e0a66ccbd4 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Thu, 1 May 2025 11:12:35 +0200
Subject: [PATCH 071/130] widen dtype test strategies

---
 src/zarr/testing/strategies.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/zarr/testing/strategies.py b/src/zarr/testing/strategies.py
index 4184112f5e..066239ff33 100644
--- a/src/zarr/testing/strategies.py
+++ b/src/zarr/testing/strategies.py
@@ -41,17 +41,17 @@ def paths(draw: st.DrawFn, *, max_num_nodes: int | None = None) -> Any:
     return draw(st.just("/") | keys(max_num_nodes=max_num_nodes))
 
 
-def v3_dtypes() -> st.SearchStrategy[np.dtype]:
+def v3_dtypes() -> st.SearchStrategy[np.dtype[Any]]:
     return (
         npst.boolean_dtypes()
         | npst.integer_dtypes(endianness="=")
         | npst.unsigned_integer_dtypes(endianness="=")
         | npst.floating_dtypes(endianness="=")
         | npst.complex_number_dtypes(endianness="=")
-        # | npst.byte_string_dtypes(endianness="=")
-        # | npst.unicode_string_dtypes()
-        | npst.datetime64_dtypes()
-        | npst.timedelta64_dtypes()
+        | npst.byte_string_dtypes(endianness="=")
+        | npst.unicode_string_dtypes(endianness="=")
+        | npst.datetime64_dtypes(endianness="=")
+        | npst.timedelta64_dtypes(endianness="=")
     )
 
 
@@ -65,7 +65,7 @@ def v2_dtypes() -> st.SearchStrategy[np.dtype[Any]]:
         | npst.byte_string_dtypes(endianness="=")
         | npst.unicode_string_dtypes(endianness="=")
         | npst.datetime64_dtypes(endianness="=")
-        | npst.timedelta64_dtypes(endianness="?")
+        | npst.timedelta64_dtypes(endianness="=")
     )
 
 

From a3f05f09559b2d411e826534ad7427c1b73bf92c Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Fri, 2 May 2025 11:13:04 +0200
Subject: [PATCH 072/130] modify structured dtype fill value rt to avoid
 to_dict

---
 tests/test_metadata/test_v2.py | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/tests/test_metadata/test_v2.py b/tests/test_metadata/test_v2.py
index dc8cd49feb..aa8cfc4a31 100644
--- a/tests/test_metadata/test_v2.py
+++ b/tests/test_metadata/test_v2.py
@@ -10,7 +10,6 @@
 import zarr.storage
 from zarr.core.buffer import cpu
 from zarr.core.buffer.core import default_buffer_prototype
-from zarr.core.dtype.npy.common import bytes_to_json
 from zarr.core.dtype.npy.float import Float32, Float64
 from zarr.core.dtype.npy.int import Int16
 from zarr.core.group import ConsolidatedMetadata, GroupMetadata
@@ -337,10 +336,5 @@ def test_structured_dtype_fill_value_serialization(tmp_path, fill_value):
 
     zarr.consolidate_metadata(root_group.store, zarr_format=zarr_format)
     root_group = zarr.open_group(group_path, mode="r")
-    observed = root_group.metadata.consolidated_metadata.to_dict()["metadata"]["structured_dtype"][
-        "fill_value"
-    ]
-    if fill_value is None:
-        assert observed is None
-    else:
-        assert observed == bytes_to_json(fill_value, zarr_format=zarr_format)
+    observed = root_group.metadata.consolidated_metadata.metadata["structured_dtype"].fill_value
+    assert observed == fill_value

From 4788f05242c342d07dea4b2761c912881ff4cc06 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Fri, 2 May 2025 16:48:49 +0200
Subject: [PATCH 073/130] wip: begin creating isomorphic test suite for dtypes

---
 src/zarr/abc/codec.py                     |   6 +-
 src/zarr/codecs/sharding.py               |   4 +-
 src/zarr/codecs/transpose.py              |   4 +-
 src/zarr/core/_info.py                    |   4 +-
 src/zarr/core/array.py                    |  24 +-
 src/zarr/core/array_spec.py               |   6 +-
 src/zarr/core/codec_pipeline.py           |   4 +-
 src/zarr/core/dtype/__init__.py           |  12 +-
 src/zarr/core/dtype/common.py             |   3 +-
 src/zarr/core/dtype/npy/bool.py           |   4 +-
 src/zarr/core/dtype/npy/common.py         |  36 +--
 src/zarr/core/dtype/npy/complex.py        |   4 +-
 src/zarr/core/dtype/npy/float.py          |   4 +-
 src/zarr/core/dtype/npy/int.py            |  20 +-
 src/zarr/core/dtype/npy/sized.py          |  18 +-
 src/zarr/core/dtype/npy/string.py         |   6 +-
 src/zarr/core/dtype/npy/time.py           |   4 +-
 src/zarr/core/dtype/registry.py           |  12 +-
 src/zarr/core/dtype/wrapper.py            |  14 +-
 src/zarr/core/metadata/v2.py              |   4 +-
 src/zarr/core/metadata/v3.py              |  10 +-
 tests/package_with_entrypoint/__init__.py |   4 +-
 tests/test_dtype/__init__.py              |   0
 tests/{ => test_dtype}/test_dtype.py      | 185 +++------------
 tests/test_dtype/test_npy/test_common.py  | 277 ++++++++++++++++++++++
 tests/test_dtype/test_npy/test_int.py     |   0
 tests/test_dtype_registry.py              | 158 ++++++++++++
 27 files changed, 570 insertions(+), 257 deletions(-)
 create mode 100644 tests/test_dtype/__init__.py
 rename tests/{ => test_dtype}/test_dtype.py (58%)
 create mode 100644 tests/test_dtype/test_npy/test_common.py
 create mode 100644 tests/test_dtype/test_npy/test_int.py
 create mode 100644 tests/test_dtype_registry.py

diff --git a/src/zarr/abc/codec.py b/src/zarr/abc/codec.py
index 31cb44d84e..d9e3520d42 100644
--- a/src/zarr/abc/codec.py
+++ b/src/zarr/abc/codec.py
@@ -15,7 +15,7 @@
     from zarr.abc.store import ByteGetter, ByteSetter
     from zarr.core.array_spec import ArraySpec
     from zarr.core.chunk_grids import ChunkGrid
-    from zarr.core.dtype.wrapper import ZDType, _BaseDType, _BaseScalar
+    from zarr.core.dtype.wrapper import TBaseDType, TBaseScalar, ZDType
     from zarr.core.indexing import SelectorTuple
 
 __all__ = [
@@ -96,7 +96,7 @@ def validate(
         self,
         *,
         shape: ChunkCoords,
-        dtype: ZDType[_BaseDType, _BaseScalar],
+        dtype: ZDType[TBaseDType, TBaseScalar],
         chunk_grid: ChunkGrid,
     ) -> None:
         """Validates that the codec configuration is compatible with the array metadata.
@@ -291,7 +291,7 @@ def supports_partial_encode(self) -> bool: ...
 
     @abstractmethod
     def validate(
-        self, *, shape: ChunkCoords, dtype: ZDType[_BaseDType, _BaseScalar], chunk_grid: ChunkGrid
+        self, *, shape: ChunkCoords, dtype: ZDType[TBaseDType, TBaseScalar], chunk_grid: ChunkGrid
     ) -> None:
         """Validates that all codec configurations are compatible with the array metadata.
         Raises errors when a codec configuration is not compatible.
diff --git a/src/zarr/codecs/sharding.py b/src/zarr/codecs/sharding.py
index 12d709b599..882a956451 100644
--- a/src/zarr/codecs/sharding.py
+++ b/src/zarr/codecs/sharding.py
@@ -59,7 +59,7 @@
     from typing import Self
 
     from zarr.core.common import JSON
-    from zarr.core.dtype.wrapper import ZDType, _BaseDType, _BaseScalar
+    from zarr.core.dtype.wrapper import TBaseDType, TBaseScalar, ZDType
 
 MAX_UINT_64 = 2**64 - 1
 ShardMapping = Mapping[ChunkCoords, Buffer]
@@ -409,7 +409,7 @@ def validate(
         self,
         *,
         shape: ChunkCoords,
-        dtype: ZDType[_BaseDType, _BaseScalar],
+        dtype: ZDType[TBaseDType, TBaseScalar],
         chunk_grid: ChunkGrid,
     ) -> None:
         if len(self.chunk_shape) != len(shape):
diff --git a/src/zarr/codecs/transpose.py b/src/zarr/codecs/transpose.py
index 0e49e3db10..b0ba7888c1 100644
--- a/src/zarr/codecs/transpose.py
+++ b/src/zarr/codecs/transpose.py
@@ -16,7 +16,7 @@
 
     from zarr.core.buffer import NDBuffer
     from zarr.core.chunk_grids import ChunkGrid
-    from zarr.core.dtype.wrapper import ZDType, _BaseDType, _BaseScalar
+    from zarr.core.dtype.wrapper import TBaseDType, TBaseScalar, ZDType
 
 
 def parse_transpose_order(data: JSON | Iterable[int]) -> tuple[int, ...]:
@@ -49,7 +49,7 @@ def to_dict(self) -> dict[str, JSON]:
     def validate(
         self,
         shape: tuple[int, ...],
-        dtype: ZDType[_BaseDType, _BaseScalar],
+        dtype: ZDType[TBaseDType, TBaseScalar],
         chunk_grid: ChunkGrid,
     ) -> None:
         if len(self.order) != len(shape):
diff --git a/src/zarr/core/_info.py b/src/zarr/core/_info.py
index 525b80c65f..e6d30413b4 100644
--- a/src/zarr/core/_info.py
+++ b/src/zarr/core/_info.py
@@ -9,7 +9,7 @@
 
     from zarr.abc.codec import ArrayArrayCodec, ArrayBytesCodec, BytesBytesCodec
     from zarr.core.common import ZarrFormat
-    from zarr.core.dtype.wrapper import ZDType, _BaseDType, _BaseScalar
+    from zarr.core.dtype.wrapper import TBaseDType, TBaseScalar, ZDType
 
 
 @dataclasses.dataclass(kw_only=True)
@@ -80,7 +80,7 @@ class ArrayInfo:
 
     _type: Literal["Array"] = "Array"
     _zarr_format: ZarrFormat
-    _data_type: ZDType[_BaseDType, _BaseScalar]
+    _data_type: ZDType[TBaseDType, TBaseScalar]
     _shape: tuple[int, ...]
     _shard_shape: tuple[int, ...] | None = None
     _chunk_shape: tuple[int, ...] | None = None
diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py
index 4f97b049ff..20af68cab7 100644
--- a/src/zarr/core/array.py
+++ b/src/zarr/core/array.py
@@ -129,7 +129,7 @@
 
     from zarr.abc.codec import CodecPipeline
     from zarr.codecs.sharding import ShardingCodecIndexLocation
-    from zarr.core.dtype.wrapper import _BaseDType, _BaseScalar
+    from zarr.core.dtype.wrapper import TBaseDType, TBaseScalar
     from zarr.core.group import AsyncGroup
     from zarr.storage import StoreLike
 
@@ -556,7 +556,7 @@ async def _create(
         *,
         # v2 and v3
         shape: ShapeLike,
-        dtype: ZDTypeLike | ZDType[_BaseDType, _BaseScalar],
+        dtype: ZDTypeLike | ZDType[TBaseDType, TBaseScalar],
         zarr_format: ZarrFormat = 3,
         fill_value: Any | None = None,
         attributes: dict[str, JSON] | None = None,
@@ -675,7 +675,7 @@ async def _create(
     @staticmethod
     def _create_metadata_v3(
         shape: ShapeLike,
-        dtype: ZDType[_BaseDType, _BaseScalar],
+        dtype: ZDType[TBaseDType, TBaseScalar],
         chunk_shape: ChunkCoords,
         fill_value: Any | None = None,
         chunk_key_encoding: ChunkKeyEncodingLike | None = None,
@@ -726,7 +726,7 @@ async def _create_v3(
         store_path: StorePath,
         *,
         shape: ShapeLike,
-        dtype: ZDType[_BaseDType, _BaseScalar],
+        dtype: ZDType[TBaseDType, TBaseScalar],
         chunk_shape: ChunkCoords,
         config: ArrayConfig,
         fill_value: Any | None = None,
@@ -774,7 +774,7 @@ async def _create_v3(
     @staticmethod
     def _create_metadata_v2(
         shape: ChunkCoords,
-        dtype: ZDType[_BaseDType, _BaseScalar],
+        dtype: ZDType[TBaseDType, TBaseScalar],
         chunks: ChunkCoords,
         order: MemoryOrder,
         dimension_separator: Literal[".", "/"] | None = None,
@@ -804,7 +804,7 @@ async def _create_v2(
         store_path: StorePath,
         *,
         shape: ChunkCoords,
-        dtype: ZDType[_BaseDType, _BaseScalar],
+        dtype: ZDType[TBaseDType, TBaseScalar],
         chunks: ChunkCoords,
         order: MemoryOrder,
         config: ArrayConfig,
@@ -1037,7 +1037,7 @@ def compressors(self) -> tuple[numcodecs.abc.Codec, ...] | tuple[BytesBytesCodec
         )
 
     @property
-    def _zdtype(self) -> ZDType[_BaseDType, _BaseScalar]:
+    def _zdtype(self) -> ZDType[TBaseDType, TBaseScalar]:
         """
         The zarr-specific representation of the array data type
         """
@@ -1047,7 +1047,7 @@ def _zdtype(self) -> ZDType[_BaseDType, _BaseScalar]:
             return self.metadata.data_type
 
     @property
-    def dtype(self) -> _BaseDType:
+    def dtype(self) -> TBaseDType:
         """Returns the data type of the array.
 
         Returns
@@ -4599,7 +4599,7 @@ def _parse_chunk_key_encoding(
 
 
 def _get_default_chunk_encoding_v3(
-    dtype: ZDType[_BaseDType, _BaseScalar],
+    dtype: ZDType[TBaseDType, TBaseScalar],
 ) -> tuple[tuple[ArrayArrayCodec, ...], ArrayBytesCodec, tuple[BytesBytesCodec, ...]]:
     """
     Get the default ArrayArrayCodecs, ArrayBytesCodec, and BytesBytesCodec for a given dtype.
@@ -4619,7 +4619,7 @@ def _get_default_chunk_encoding_v3(
 
 
 def _get_default_chunk_encoding_v2(
-    dtype: ZDType[_BaseDType, _BaseScalar],
+    dtype: ZDType[TBaseDType, TBaseScalar],
 ) -> tuple[tuple[numcodecs.abc.Codec, ...] | None, numcodecs.abc.Codec | None]:
     """
     Get the default chunk encoding for Zarr format 2 arrays, given a dtype
@@ -4637,7 +4637,7 @@ def _parse_chunk_encoding_v2(
     *,
     compressor: CompressorsLike,
     filters: FiltersLike,
-    dtype: ZDType[_BaseDType, _BaseScalar],
+    dtype: ZDType[TBaseDType, TBaseScalar],
 ) -> tuple[tuple[numcodecs.abc.Codec, ...] | None, numcodecs.abc.Codec | None]:
     """
     Generate chunk encoding classes for Zarr format 2 arrays with optional defaults.
@@ -4681,7 +4681,7 @@ def _parse_chunk_encoding_v3(
     compressors: CompressorsLike,
     filters: FiltersLike,
     serializer: SerializerLike,
-    dtype: ZDType[_BaseDType, _BaseScalar],
+    dtype: ZDType[TBaseDType, TBaseScalar],
 ) -> tuple[tuple[ArrayArrayCodec, ...], ArrayBytesCodec, tuple[BytesBytesCodec, ...]]:
     """
     Generate chunk encoding classes for v3 arrays with optional defaults.
diff --git a/src/zarr/core/array_spec.py b/src/zarr/core/array_spec.py
index e8e451944f..279bf6edf0 100644
--- a/src/zarr/core/array_spec.py
+++ b/src/zarr/core/array_spec.py
@@ -17,7 +17,7 @@
 
     from zarr.core.buffer import BufferPrototype
     from zarr.core.common import ChunkCoords
-    from zarr.core.dtype.wrapper import ZDType, _BaseDType, _BaseScalar
+    from zarr.core.dtype.wrapper import TBaseDType, TBaseScalar, ZDType
 
 
 class ArrayConfigParams(TypedDict):
@@ -89,7 +89,7 @@ def parse_array_config(data: ArrayConfigLike | None) -> ArrayConfig:
 @dataclass(frozen=True)
 class ArraySpec:
     shape: ChunkCoords
-    dtype: ZDType[_BaseDType, _BaseScalar]
+    dtype: ZDType[TBaseDType, TBaseScalar]
     fill_value: Any
     config: ArrayConfig
     prototype: BufferPrototype
@@ -97,7 +97,7 @@ class ArraySpec:
     def __init__(
         self,
         shape: ChunkCoords,
-        dtype: ZDType[_BaseDType, _BaseScalar],
+        dtype: ZDType[TBaseDType, TBaseScalar],
         fill_value: Any,
         config: ArrayConfig,
         prototype: BufferPrototype,
diff --git a/src/zarr/core/codec_pipeline.py b/src/zarr/core/codec_pipeline.py
index 71600fee90..3d00fe5467 100644
--- a/src/zarr/core/codec_pipeline.py
+++ b/src/zarr/core/codec_pipeline.py
@@ -27,7 +27,7 @@
     from zarr.core.array_spec import ArraySpec
     from zarr.core.buffer import Buffer, BufferPrototype, NDBuffer
     from zarr.core.chunk_grids import ChunkGrid
-    from zarr.core.dtype.wrapper import ZDType, _BaseDType, _BaseScalar
+    from zarr.core.dtype.wrapper import TBaseDType, TBaseScalar, ZDType
 
 T = TypeVar("T")
 U = TypeVar("U")
@@ -133,7 +133,7 @@ def __iter__(self) -> Iterator[Codec]:
         yield from self.bytes_bytes_codecs
 
     def validate(
-        self, *, shape: ChunkCoords, dtype: ZDType[_BaseDType, _BaseScalar], chunk_grid: ChunkGrid
+        self, *, shape: ChunkCoords, dtype: ZDType[TBaseDType, TBaseScalar], chunk_grid: ChunkGrid
     ) -> None:
         for codec in self:
             codec.validate(shape=shape, dtype=dtype, chunk_grid=chunk_grid)
diff --git a/src/zarr/core/dtype/__init__.py b/src/zarr/core/dtype/__init__.py
index f535f62f35..1a18849a13 100644
--- a/src/zarr/core/dtype/__init__.py
+++ b/src/zarr/core/dtype/__init__.py
@@ -2,6 +2,7 @@
 
 from typing import TYPE_CHECKING, TypeAlias, get_args
 
+from zarr.core.dtype.common import DataTypeValidationError
 from zarr.core.dtype.npy.bool import Bool
 from zarr.core.dtype.npy.complex import Complex64, Complex128
 from zarr.core.dtype.npy.float import Float16, Float32, Float64
@@ -26,11 +27,12 @@
     VariableLengthString,
 )
 from zarr.core.dtype.registry import DataTypeRegistry
-from zarr.core.dtype.wrapper import ZDType, _BaseDType, _BaseScalar
+from zarr.core.dtype.wrapper import TBaseDType, TBaseScalar, ZDType
 
 __all__ = [
     "Complex64",
     "Complex128",
+    "DataTypeValidationError",
     "DateTime64",
     "FixedLengthAscii",
     "FixedLengthBytes",
@@ -73,14 +75,14 @@
     | TimeDelta64
 )
 
-ZDTypeLike: TypeAlias = npt.DTypeLike | ZDType[_BaseDType, _BaseScalar] | dict[str, JSON]
+ZDTypeLike: TypeAlias = npt.DTypeLike | ZDType[TBaseDType, TBaseScalar] | dict[str, JSON]
 
 for dtype in get_args(DTYPE):
     data_type_registry.register(dtype._zarr_v3_name, dtype)
 
 
 # TODO: find a better name for this function
-def get_data_type_from_native_dtype(dtype: npt.DTypeLike) -> ZDType[_BaseDType, _BaseScalar]:
+def get_data_type_from_native_dtype(dtype: npt.DTypeLike) -> ZDType[TBaseDType, TBaseScalar]:
     """
     Get a data type wrapper (an instance of ``ZDType``) from a native data type, e.g. a numpy dtype.
     """
@@ -106,11 +108,11 @@ def get_data_type_from_native_dtype(dtype: npt.DTypeLike) -> ZDType[_BaseDType,
 
 def get_data_type_from_json(
     dtype: JSON, zarr_format: ZarrFormat
-) -> ZDType[_BaseDType, _BaseScalar]:
+) -> ZDType[TBaseDType, TBaseScalar]:
     return data_type_registry.match_json(dtype, zarr_format=zarr_format)
 
 
-def parse_data_type(dtype: ZDTypeLike, zarr_format: ZarrFormat) -> ZDType[_BaseDType, _BaseScalar]:
+def parse_data_type(dtype: ZDTypeLike, zarr_format: ZarrFormat) -> ZDType[TBaseDType, TBaseScalar]:
     """
     Interpret the input as a ZDType instance.
     """
diff --git a/src/zarr/core/dtype/common.py b/src/zarr/core/dtype/common.py
index 657f56bfb7..4249c57b1f 100644
--- a/src/zarr/core/dtype/common.py
+++ b/src/zarr/core/dtype/common.py
@@ -4,7 +4,8 @@
 from typing import Literal
 
 Endianness = Literal["little", "big"]
-JSONFloat = float | Literal["NaN", "Infinity", "-Infinity"]
+SpecialFloats = Literal["NaN", "Infinity", "-Infinity"]
+JSONFloat = float | SpecialFloats
 
 
 class DataTypeValidationError(ValueError): ...
diff --git a/src/zarr/core/dtype/npy/bool.py b/src/zarr/core/dtype/npy/bool.py
index 293d8383c0..776acf4f8c 100644
--- a/src/zarr/core/dtype/npy/bool.py
+++ b/src/zarr/core/dtype/npy/bool.py
@@ -5,7 +5,7 @@
 
 from zarr.core.common import JSON, ZarrFormat
 from zarr.core.dtype.npy.common import check_json_bool
-from zarr.core.dtype.wrapper import ZDType, _BaseDType
+from zarr.core.dtype.wrapper import TBaseDType, ZDType
 
 
 @dataclass(frozen=True, kw_only=True, slots=True)
@@ -26,7 +26,7 @@ class Bool(ZDType[np.dtypes.BoolDType, np.bool_]):
     dtype_cls = np.dtypes.BoolDType
 
     @classmethod
-    def _from_dtype_unsafe(cls, dtype: _BaseDType) -> Self:
+    def _from_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
         return cls()
 
     def to_dtype(self: Self) -> np.dtypes.BoolDType:
diff --git a/src/zarr/core/dtype/npy/common.py b/src/zarr/core/dtype/npy/common.py
index 857c515c19..8ef1286e6f 100644
--- a/src/zarr/core/dtype/npy/common.py
+++ b/src/zarr/core/dtype/npy/common.py
@@ -77,7 +77,7 @@ def endianness_from_numpy_str(endianness: EndiannessNumpy) -> Endianness | None:
             # for dtypes without byte ordering semantics
             return None
     raise ValueError(
-        f"Invalid endianness: {endianness}. Expected one of {get_args(EndiannessNumpy)}"
+        f"Invalid endianness: {endianness!r}. Expected one of {get_args(EndiannessNumpy)}"
     )
 
 
@@ -108,7 +108,7 @@ def endianness_to_numpy_str(endianness: Endianness | None) -> EndiannessNumpy:
         case None:
             return "|"
     raise ValueError(
-        f"Invalid endianness: {endianness}. Expected one of {get_args(Endianness)} or None"
+        f"Invalid endianness: {endianness!r}. Expected one of {get_args(Endianness)} or None"
     )
 
 
@@ -155,7 +155,7 @@ def float_from_json_v3(data: JSONFloat) -> float:
     return float_from_json_v2(data)
 
 
-def float_from_json(data: JSONFloat, zarr_format: ZarrFormat) -> float:
+def float_from_json(data: JSONFloat, *, zarr_format: ZarrFormat) -> float:
     """
     Convert a JSON float to a float based on zarr format.
 
@@ -177,7 +177,7 @@ def float_from_json(data: JSONFloat, zarr_format: ZarrFormat) -> float:
         return float_from_json_v3(data)
 
 
-def bytes_from_json(data: str, zarr_format: ZarrFormat) -> bytes:
+def bytes_from_json(data: str, *, zarr_format: ZarrFormat) -> bytes:
     """
     Convert a JSON string to bytes
 
@@ -198,7 +198,7 @@ def bytes_from_json(data: str, zarr_format: ZarrFormat) -> bytes:
     # TODO: differentiate these as needed. This is a spec question.
     if zarr_format == 3:
         return base64.b64decode(data.encode("ascii"))
-    raise ValueError(f"Invalid zarr format: {zarr_format}. Expected 2 or 3.")
+    raise ValueError(f"Invalid zarr format: {zarr_format}. Expected 2 or 3.")  # pragma: no cover
 
 
 def bytes_to_json(data: bytes, zarr_format: ZarrFormat) -> str:
@@ -261,9 +261,11 @@ def float_to_json_v3(data: float | np.floating[Any]) -> JSONFloat:
     return float_to_json_v2(data)
 
 
-def complex_to_json_v3(data: complex | np.complexfloating[Any, Any]) -> tuple[JSONFloat, JSONFloat]:
+def complex_float_to_json_v3(
+    data: complex | np.complexfloating[Any, Any],
+) -> tuple[JSONFloat, JSONFloat]:
     """
-    Convert a complex number to JSON (v3).
+    Convert a complex number to JSON as defined by the Zarr V3 spec.
 
     Parameters
     ----------
@@ -278,13 +280,15 @@ def complex_to_json_v3(data: complex | np.complexfloating[Any, Any]) -> tuple[JS
     return float_to_json_v3(data.real), float_to_json_v3(data.imag)
 
 
-def complex_to_json_v2(data: complex | np.complexfloating[Any, Any]) -> tuple[JSONFloat, JSONFloat]:
+def complex_float_to_json_v2(
+    data: complex | np.complexfloating[Any, Any],
+) -> tuple[JSONFloat, JSONFloat]:
     """
-    Convert a complex number to JSON (v2).
+    Convert a complex number to JSON as defined by the Zarr V2 spec.
 
     Parameters
     ----------
-    data : complex or np.complexfloating
+    data : complex | np.complexfloating
         The complex value to convert.
 
     Returns
@@ -296,14 +300,14 @@ def complex_to_json_v2(data: complex | np.complexfloating[Any, Any]) -> tuple[JS
 
 
 def complex_float_to_json(
-    data: complex | np.complexfloating[Any, Any], zarr_format: ZarrFormat
+    data: complex | np.complexfloating[Any, Any], *, zarr_format: ZarrFormat
 ) -> tuple[JSONFloat, JSONFloat]:
     """
     Convert a complex number to JSON, parametrized by the zarr format version.
 
     Parameters
     ----------
-    data : complex or np.complexfloating
+    data : complex | np.complexfloating
         The complex value to convert.
     zarr_format : ZarrFormat
         The zarr format version.
@@ -314,19 +318,19 @@ def complex_float_to_json(
         The JSON representation of the complex number.
     """
     if zarr_format == 2:
-        return complex_to_json_v2(data)
+        return complex_float_to_json_v2(data)
     else:
-        return complex_to_json_v3(data)
+        return complex_float_to_json_v3(data)
     raise ValueError(f"Invalid zarr format: {zarr_format}. Expected 2 or 3.")
 
 
-def float_to_json(data: float | np.floating[Any], zarr_format: ZarrFormat) -> JSONFloat:
+def float_to_json(data: float | np.floating[Any], *, zarr_format: ZarrFormat) -> JSONFloat:
     """
     Convert a float to JSON, parametrized by the zarr format version.
 
     Parameters
     ----------
-    data : float or np.floating
+    data : float | np.floating
         The float value to convert.
     zarr_format : ZarrFormat
         The zarr format version.
diff --git a/src/zarr/core/dtype/npy/complex.py b/src/zarr/core/dtype/npy/complex.py
index 22e1bd66a3..6e19266660 100644
--- a/src/zarr/core/dtype/npy/complex.py
+++ b/src/zarr/core/dtype/npy/complex.py
@@ -21,7 +21,7 @@
     endianness_from_numpy_str,
     endianness_to_numpy_str,
 )
-from zarr.core.dtype.wrapper import ZDType, _BaseDType
+from zarr.core.dtype.wrapper import TBaseDType, ZDType
 
 if TYPE_CHECKING:
     from zarr.core.dtype.npy.common import EndiannessNumpy
@@ -33,7 +33,7 @@ class BaseComplex(ZDType[TComplexDType_co, TComplexScalar_co], HasEndianness):
     _zarr_v2_names: ClassVar[tuple[str, ...]]
 
     @classmethod
-    def _from_dtype_unsafe(cls, dtype: _BaseDType) -> Self:
+    def _from_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
         byte_order = cast("EndiannessNumpy", dtype.byteorder)
         return cls(endianness=endianness_from_numpy_str(byte_order))
 
diff --git a/src/zarr/core/dtype/npy/float.py b/src/zarr/core/dtype/npy/float.py
index 3f56919cf4..15baaaadaa 100644
--- a/src/zarr/core/dtype/npy/float.py
+++ b/src/zarr/core/dtype/npy/float.py
@@ -16,7 +16,7 @@
     float_from_json,
     float_to_json,
 )
-from zarr.core.dtype.wrapper import ZDType, _BaseDType
+from zarr.core.dtype.wrapper import TBaseDType, ZDType
 
 
 @dataclass(frozen=True)
@@ -25,7 +25,7 @@ class BaseFloat(ZDType[TFloatDType_co, TFloatScalar_co], HasEndianness):
     _zarr_v2_names: ClassVar[tuple[str, ...]]
 
     @classmethod
-    def _from_dtype_unsafe(cls, dtype: _BaseDType) -> Self:
+    def _from_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
         byte_order = cast("EndiannessNumpy", dtype.byteorder)
         return cls(endianness=endianness_from_numpy_str(byte_order))
 
diff --git a/src/zarr/core/dtype/npy/int.py b/src/zarr/core/dtype/npy/int.py
index 500f98bb73..7da7245162 100644
--- a/src/zarr/core/dtype/npy/int.py
+++ b/src/zarr/core/dtype/npy/int.py
@@ -11,7 +11,7 @@
     endianness_from_numpy_str,
     endianness_to_numpy_str,
 )
-from zarr.core.dtype.wrapper import ZDType, _BaseDType
+from zarr.core.dtype.wrapper import TBaseDType, ZDType
 
 _NumpyIntDType = (
     np.dtypes.Int8DType
@@ -132,7 +132,7 @@ class Int8(BaseInt[np.dtypes.Int8DType, np.int8]):
     _zarr_v2_names: ClassVar[tuple[str, ...]] = ("|i1",)
 
     @classmethod
-    def _from_dtype_unsafe(cls, dtype: _BaseDType) -> Self:
+    def _from_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
         return cls()
 
     def to_dtype(self: Self) -> np.dtypes.Int8DType:
@@ -150,7 +150,7 @@ class UInt8(BaseInt[np.dtypes.UInt8DType, np.uint8]):
     _zarr_v2_names: ClassVar[tuple[str, ...]] = ("|u1",)
 
     @classmethod
-    def _from_dtype_unsafe(cls, dtype: _BaseDType) -> Self:
+    def _from_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
         return cls()
 
     def to_dtype(self: Self) -> np.dtypes.UInt8DType:
@@ -168,7 +168,7 @@ class Int16(BaseInt[np.dtypes.Int16DType, np.int16], HasEndianness):
     _zarr_v2_names: ClassVar[tuple[str, ...]] = (">i2", "<i2")
 
     @classmethod
-    def _from_dtype_unsafe(cls, dtype: _BaseDType) -> Self:
+    def _from_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
         byte_order = cast("EndiannessNumpy", dtype.byteorder)
         return cls(endianness=endianness_from_numpy_str(byte_order))
 
@@ -193,7 +193,7 @@ class UInt16(BaseInt[np.dtypes.UInt16DType, np.uint16], HasEndianness):
     _zarr_v2_names: ClassVar[tuple[str, ...]] = (">u2", "<u2")
 
     @classmethod
-    def _from_dtype_unsafe(cls, dtype: _BaseDType) -> Self:
+    def _from_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
         byte_order = cast("EndiannessNumpy", dtype.byteorder)
         return cls(endianness=endianness_from_numpy_str(byte_order))
 
@@ -217,7 +217,7 @@ class Int32(BaseInt[np.dtypes.Int32DType, np.int32], HasEndianness):
     _zarr_v2_names: ClassVar[tuple[str, ...]] = (">i4", "<i4")
 
     @classmethod
-    def from_dtype(cls: type[Self], dtype: _BaseDType) -> Self:
+    def from_dtype(cls: type[Self], dtype: TBaseDType) -> Self:
         # We override the base implementation to address a windows-specific, pre-numpy 2 issue where
         # ``np.dtype('i')`` is an instance of ``np.dtypes.IntDType`` that acts like `int32` instead of ``np.dtype('int32')``
         # In this case, ``type(np.dtype('i')) == np.dtypes.Int32DType``  will evaluate to ``True``,
@@ -229,7 +229,7 @@ def from_dtype(cls: type[Self], dtype: _BaseDType) -> Self:
             return super().from_dtype(dtype)
 
     @classmethod
-    def _from_dtype_unsafe(cls, dtype: _BaseDType) -> Self:
+    def _from_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
         byte_order = cast("EndiannessNumpy", dtype.byteorder)
         return cls(endianness=endianness_from_numpy_str(byte_order))
 
@@ -253,7 +253,7 @@ class UInt32(BaseInt[np.dtypes.UInt32DType, np.uint32], HasEndianness):
     _zarr_v2_names: ClassVar[tuple[str, ...]] = (">u4", "<u4")
 
     @classmethod
-    def _from_dtype_unsafe(cls, dtype: _BaseDType) -> Self:
+    def _from_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
         byte_order = cast("EndiannessNumpy", dtype.byteorder)
         return cls(endianness=endianness_from_numpy_str(byte_order))
 
@@ -277,7 +277,7 @@ class Int64(BaseInt[np.dtypes.Int64DType, np.int64], HasEndianness):
     _zarr_v2_names: ClassVar[tuple[str, ...]] = (">i8", "<i8")
 
     @classmethod
-    def _from_dtype_unsafe(cls, dtype: _BaseDType) -> Self:
+    def _from_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
         byte_order = cast("EndiannessNumpy", dtype.byteorder)
         return cls(endianness=endianness_from_numpy_str(byte_order))
 
@@ -301,7 +301,7 @@ class UInt64(BaseInt[np.dtypes.UInt64DType, np.uint64], HasEndianness):
     _zarr_v2_names: ClassVar[tuple[str, ...]] = (">u8", "<u8")
 
     @classmethod
-    def _from_dtype_unsafe(cls, dtype: _BaseDType) -> Self:
+    def _from_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
         byte_order = cast("EndiannessNumpy", dtype.byteorder)
         return cls(endianness=endianness_from_numpy_str(byte_order))
 
diff --git a/src/zarr/core/dtype/npy/sized.py b/src/zarr/core/dtype/npy/sized.py
index 8d8ff57800..d9524a4891 100644
--- a/src/zarr/core/dtype/npy/sized.py
+++ b/src/zarr/core/dtype/npy/sized.py
@@ -16,7 +16,7 @@
     endianness_from_numpy_str,
     endianness_to_numpy_str,
 )
-from zarr.core.dtype.wrapper import ZDType, _BaseDType, _BaseScalar
+from zarr.core.dtype.wrapper import TBaseDType, TBaseScalar, ZDType
 
 
 @dataclass(frozen=True, kw_only=True)
@@ -26,7 +26,7 @@ class FixedLengthAscii(ZDType[np.dtypes.BytesDType[int], np.bytes_], HasLength):
     item_size_bits: ClassVar[int] = 8
 
     @classmethod
-    def _from_dtype_unsafe(cls, dtype: _BaseDType) -> Self:
+    def _from_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
         return cls(length=dtype.itemsize // (cls.item_size_bits // 8))
 
     def to_dtype(self) -> np.dtypes.BytesDType[int]:
@@ -98,7 +98,7 @@ class FixedLengthBytes(ZDType[np.dtypes.VoidDType[int], np.void], HasLength):
     item_size_bits: ClassVar[int] = 8
 
     @classmethod
-    def _from_dtype_unsafe(cls, dtype: _BaseDType) -> Self:
+    def _from_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
         return cls(length=dtype.itemsize // (cls.item_size_bits // 8))
 
     def to_dtype(self) -> np.dtypes.VoidDType[int]:
@@ -136,7 +136,7 @@ def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     @classmethod
-    def check_dtype(cls: type[Self], dtype: _BaseDType) -> TypeGuard[np.dtypes.VoidDType[Any]]:
+    def check_dtype(cls: type[Self], dtype: TBaseDType) -> TypeGuard[np.dtypes.VoidDType[Any]]:
         """
         Numpy void dtype comes in two forms:
         * If the ``fields`` attribute is ``None``, then the dtype represents N raw bytes.
@@ -181,7 +181,7 @@ class FixedLengthUnicode(ZDType[np.dtypes.StrDType[int], np.str_], HasEndianness
     item_size_bits: ClassVar[int] = 32  # UCS4 is 32 bits per code point
 
     @classmethod
-    def _from_dtype_unsafe(cls, dtype: _BaseDType) -> Self:
+    def _from_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
         byte_order = cast("EndiannessNumpy", dtype.byteorder)
         return cls(
             length=dtype.itemsize // (cls.item_size_bits // 8),
@@ -252,7 +252,7 @@ def _cast_value_unsafe(self, value: object) -> np.str_:
 class Structured(ZDType[np.dtypes.VoidDType[int], np.void]):
     dtype_cls = np.dtypes.VoidDType  # type: ignore[assignment]
     _zarr_v3_name = "structured"
-    fields: tuple[tuple[str, ZDType[_BaseDType, _BaseScalar]], ...]
+    fields: tuple[tuple[str, ZDType[TBaseDType, TBaseScalar]], ...]
 
     def default_value(self) -> np.void:
         return self._cast_value_unsafe(0)
@@ -261,7 +261,7 @@ def _cast_value_unsafe(self, value: object) -> np.void:
         return cast("np.void", np.array([value], dtype=self.to_dtype())[0])
 
     @classmethod
-    def check_dtype(cls, dtype: _BaseDType) -> TypeGuard[np.dtypes.VoidDType[int]]:
+    def check_dtype(cls, dtype: TBaseDType) -> TypeGuard[np.dtypes.VoidDType[int]]:
         """
         Check that this dtype is a numpy structured dtype
 
@@ -278,10 +278,10 @@ def check_dtype(cls, dtype: _BaseDType) -> TypeGuard[np.dtypes.VoidDType[int]]:
         return super().check_dtype(dtype) and dtype.fields is not None
 
     @classmethod
-    def _from_dtype_unsafe(cls, dtype: _BaseDType) -> Self:
+    def _from_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
         from zarr.core.dtype import get_data_type_from_native_dtype
 
-        fields: list[tuple[str, ZDType[_BaseDType, _BaseScalar]]] = []
+        fields: list[tuple[str, ZDType[TBaseDType, TBaseScalar]]] = []
 
         if dtype.fields is None:
             raise ValueError("numpy dtype has no fields")
diff --git a/src/zarr/core/dtype/npy/string.py b/src/zarr/core/dtype/npy/string.py
index 15ccfb30f1..3849fd05ce 100644
--- a/src/zarr/core/dtype/npy/string.py
+++ b/src/zarr/core/dtype/npy/string.py
@@ -10,7 +10,7 @@
 
 if TYPE_CHECKING:
     from zarr.core.common import JSON, ZarrFormat
-    from zarr.core.dtype.wrapper import _BaseDType
+    from zarr.core.dtype.wrapper import TBaseDType
 
 _NUMPY_SUPPORTS_VLEN_STRING = hasattr(np.dtypes, "StringDType")
 
@@ -23,7 +23,7 @@ class VariableLengthString(ZDType[np.dtypes.StringDType, str]):  # type: ignore[
         _zarr_v3_name = "numpy.variable_length_utf8"
 
         @classmethod
-        def _from_dtype_unsafe(cls, dtype: _BaseDType) -> Self:
+        def _from_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
             return cls()
 
         def to_dtype(self) -> np.dtypes.StringDType:
@@ -83,7 +83,7 @@ class VariableLengthString(ZDType[np.dtypes.ObjectDType, str]):  # type: ignore[
         _zarr_v3_name = "numpy.variable_length_utf8"
 
         @classmethod
-        def _from_dtype_unsafe(cls, dtype: _BaseDType) -> Self:
+        def _from_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
             return cls()
 
         def to_dtype(self) -> np.dtypes.ObjectDType:
diff --git a/src/zarr/core/dtype/npy/time.py b/src/zarr/core/dtype/npy/time.py
index 056836a105..f691bd88c8 100644
--- a/src/zarr/core/dtype/npy/time.py
+++ b/src/zarr/core/dtype/npy/time.py
@@ -25,7 +25,7 @@
     endianness_from_numpy_str,
     endianness_to_numpy_str,
 )
-from zarr.core.dtype.wrapper import ZDType, _BaseDType
+from zarr.core.dtype.wrapper import TBaseDType, ZDType
 
 if TYPE_CHECKING:
     from zarr.core.common import JSON, ZarrFormat
@@ -109,7 +109,7 @@ class TimeDTypeBase(ZDType[_BaseTimeDType_co, _BaseTimeScalar], HasEndianness):
     unit: DateTimeUnit
 
     @classmethod
-    def _from_dtype_unsafe(cls, dtype: _BaseDType) -> Self:
+    def _from_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
         unit, interval = np.datetime_data(dtype.name)
         byteorder = cast("EndiannessNumpy", dtype.byteorder)
         return cls(unit=unit, interval=interval, endianness=endianness_from_numpy_str(byteorder))  # type: ignore[arg-type]
diff --git a/src/zarr/core/dtype/registry.py b/src/zarr/core/dtype/registry.py
index 4ad2158f96..ae5c3d426e 100644
--- a/src/zarr/core/dtype/registry.py
+++ b/src/zarr/core/dtype/registry.py
@@ -9,7 +9,7 @@
     from importlib.metadata import EntryPoint
 
     from zarr.core.common import JSON, ZarrFormat
-    from zarr.core.dtype.wrapper import ZDType, _BaseDType, _BaseScalar
+    from zarr.core.dtype.wrapper import TBaseDType, TBaseScalar, ZDType
 
 
 # This class is different from the other registry classes, which inherit from
@@ -17,7 +17,7 @@
 # have just 1 registry class in use.
 @dataclass(frozen=True, kw_only=True)
 class DataTypeRegistry:
-    contents: dict[str, type[ZDType[_BaseDType, _BaseScalar]]] = field(
+    contents: dict[str, type[ZDType[TBaseDType, TBaseScalar]]] = field(
         default_factory=dict, init=False
     )
     lazy_load_list: list[EntryPoint] = field(default_factory=list, init=False)
@@ -28,15 +28,15 @@ def lazy_load(self) -> None:
 
         self.lazy_load_list.clear()
 
-    def register(self: Self, key: str, cls: type[ZDType[_BaseDType, _BaseScalar]]) -> None:
+    def register(self: Self, key: str, cls: type[ZDType[TBaseDType, TBaseScalar]]) -> None:
         # don't register the same dtype twice
         if key not in self.contents or self.contents[key] != cls:
             self.contents[key] = cls
 
-    def get(self, key: str) -> type[ZDType[_BaseDType, _BaseScalar]]:
+    def get(self, key: str) -> type[ZDType[TBaseDType, TBaseScalar]]:
         return self.contents[key]
 
-    def match_dtype(self, dtype: _BaseDType) -> ZDType[_BaseDType, _BaseScalar]:
+    def match_dtype(self, dtype: TBaseDType) -> ZDType[TBaseDType, TBaseScalar]:
         self.lazy_load()
         for val in self.contents.values():
             try:
@@ -45,7 +45,7 @@ def match_dtype(self, dtype: _BaseDType) -> ZDType[_BaseDType, _BaseScalar]:
                 pass
         raise ValueError(f"No data type wrapper found that matches dtype '{dtype}'")
 
-    def match_json(self, data: JSON, zarr_format: ZarrFormat) -> ZDType[_BaseDType, _BaseScalar]:
+    def match_json(self, data: JSON, zarr_format: ZarrFormat) -> ZDType[TBaseDType, TBaseScalar]:
         self.lazy_load()
         for val in self.contents.values():
             try:
diff --git a/src/zarr/core/dtype/wrapper.py b/src/zarr/core/dtype/wrapper.py
index ba1b78f096..be51db3ae5 100644
--- a/src/zarr/core/dtype/wrapper.py
+++ b/src/zarr/core/dtype/wrapper.py
@@ -35,15 +35,15 @@
 
 # This the upper bound for the scalar types we support. It's numpy scalars + str,
 # because the new variable-length string dtype in numpy does not have a corresponding scalar type
-_BaseScalar = np.generic | str
+TBaseScalar = np.generic | str
 # This is the bound for the dtypes that we support. If we support non-numpy dtypes,
 # then this bound will need to be widened.
-_BaseDType = np.dtype[np.generic]
+TBaseDType = np.dtype[np.generic]
 # These two type parameters are covariant because we want
 # x : ZDType[BaseDType, BaseScalar] = ZDType[SubDType, SubScalar]
 # to type check
-TScalar_co = TypeVar("TScalar_co", bound=_BaseScalar, covariant=True)
-TDType_co = TypeVar("TDType_co", bound=_BaseDType, covariant=True)
+TScalar_co = TypeVar("TScalar_co", bound=TBaseScalar, covariant=True)
+TDType_co = TypeVar("TDType_co", bound=TBaseDType, covariant=True)
 
 
 @dataclass(frozen=True, kw_only=True, slots=True)
@@ -69,7 +69,7 @@ class ZDType(Generic[TDType_co, TScalar_co], ABC):
     _zarr_v3_name: ClassVar[str]
 
     @classmethod
-    def check_dtype(cls: type[Self], dtype: _BaseDType) -> TypeGuard[TDType_co]:
+    def check_dtype(cls: type[Self], dtype: TBaseDType) -> TypeGuard[TDType_co]:
         """
         Check that a data type matches the dtype_cls class attribute. Used as a type guard.
 
@@ -86,7 +86,7 @@ def check_dtype(cls: type[Self], dtype: _BaseDType) -> TypeGuard[TDType_co]:
         return type(dtype) is cls.dtype_cls
 
     @classmethod
-    def from_dtype(cls: type[Self], dtype: _BaseDType) -> Self:
+    def from_dtype(cls: type[Self], dtype: TBaseDType) -> Self:
         """
         Wrap a dtype object.
 
@@ -113,7 +113,7 @@ def from_dtype(cls: type[Self], dtype: _BaseDType) -> Self:
 
     @classmethod
     @abstractmethod
-    def _from_dtype_unsafe(cls: type[Self], dtype: _BaseDType) -> Self:
+    def _from_dtype_unsafe(cls: type[Self], dtype: TBaseDType) -> Self:
         """
         Wrap a native dtype without checking.
 
diff --git a/src/zarr/core/metadata/v2.py b/src/zarr/core/metadata/v2.py
index aa2837f598..ab3da36cfe 100644
--- a/src/zarr/core/metadata/v2.py
+++ b/src/zarr/core/metadata/v2.py
@@ -9,7 +9,7 @@
 
 from zarr.abc.metadata import Metadata
 from zarr.core.dtype import get_data_type_from_native_dtype
-from zarr.core.dtype.wrapper import TDType_co, TScalar_co, ZDType, _BaseDType, _BaseScalar
+from zarr.core.dtype.wrapper import TBaseDType, TBaseScalar, TDType_co, TScalar_co, ZDType
 
 if TYPE_CHECKING:
     from typing import Literal, Self
@@ -45,7 +45,7 @@ class ArrayV2MetadataDict(TypedDict):
 class ArrayV2Metadata(Metadata):
     shape: ChunkCoords
     chunks: ChunkCoords
-    dtype: ZDType[_BaseDType, _BaseScalar]
+    dtype: ZDType[TBaseDType, TBaseScalar]
     fill_value: int | float | str | bytes | None = 0
     order: MemoryOrder = "C"
     filters: tuple[numcodecs.abc.Codec, ...] | None = None
diff --git a/src/zarr/core/metadata/v3.py b/src/zarr/core/metadata/v3.py
index b82fb54270..fe8ced1d3f 100644
--- a/src/zarr/core/metadata/v3.py
+++ b/src/zarr/core/metadata/v3.py
@@ -16,7 +16,7 @@
     from zarr.core.buffer import Buffer, BufferPrototype
     from zarr.core.chunk_grids import ChunkGrid
     from zarr.core.common import JSON, ChunkCoords
-    from zarr.core.dtype.wrapper import _BaseDType, _BaseScalar
+    from zarr.core.dtype.wrapper import TBaseDType, TBaseScalar
 
 
 import json
@@ -82,7 +82,7 @@ def validate_array_bytes_codec(codecs: tuple[Codec, ...]) -> ArrayBytesCodec:
     return abcs[0]
 
 
-def validate_codecs(codecs: tuple[Codec, ...], dtype: ZDType[_BaseDType, _BaseScalar]) -> None:
+def validate_codecs(codecs: tuple[Codec, ...], dtype: ZDType[TBaseDType, TBaseScalar]) -> None:
     """Check that the codecs are valid for the given dtype"""
     from zarr.codecs.sharding import ShardingCodec
 
@@ -141,7 +141,7 @@ class ArrayV3MetadataDict(TypedDict):
 @dataclass(frozen=True, kw_only=True)
 class ArrayV3Metadata(Metadata):
     shape: ChunkCoords
-    data_type: ZDType[_BaseDType, _BaseScalar]
+    data_type: ZDType[TBaseDType, TBaseScalar]
     chunk_grid: ChunkGrid
     chunk_key_encoding: ChunkKeyEncoding
     fill_value: Any
@@ -156,7 +156,7 @@ def __init__(
         self,
         *,
         shape: Iterable[int],
-        data_type: ZDType[_BaseDType, _BaseScalar],
+        data_type: ZDType[TBaseDType, TBaseScalar],
         chunk_grid: dict[str, JSON] | ChunkGrid,
         chunk_key_encoding: ChunkKeyEncodingLike,
         fill_value: object,
@@ -222,7 +222,7 @@ def ndim(self) -> int:
         return len(self.shape)
 
     @property
-    def dtype(self) -> ZDType[_BaseDType, _BaseScalar]:
+    def dtype(self) -> ZDType[TBaseDType, TBaseScalar]:
         return self.data_type
 
     @property
diff --git a/tests/package_with_entrypoint/__init__.py b/tests/package_with_entrypoint/__init__.py
index 941f7e71c2..3b46740c35 100644
--- a/tests/package_with_entrypoint/__init__.py
+++ b/tests/package_with_entrypoint/__init__.py
@@ -8,7 +8,7 @@
 from zarr.codecs import BytesCodec
 from zarr.core.array_spec import ArraySpec
 from zarr.core.buffer import Buffer, NDBuffer
-from zarr.core.common import BytesLike
+from zarr.core.common import BytesLike, ZarrFormat
 from zarr.core.dtype.npy.bool import Bool
 
 
@@ -81,5 +81,5 @@ def from_json(cls, data: Any, zarr_format: Literal[2, 3]) -> Self:
             return cls()
         raise ValueError
 
-    def to_json(self, zarr_format):
+    def to_json(self, zarr_format: ZarrFormat) -> str:
         return self._zarr_v3_name
diff --git a/tests/test_dtype/__init__.py b/tests/test_dtype/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/tests/test_dtype.py b/tests/test_dtype/test_dtype.py
similarity index 58%
rename from tests/test_dtype.py
rename to tests/test_dtype/test_dtype.py
index 2b520383b1..566a04b5fb 100644
--- a/tests/test_dtype.py
+++ b/tests/test_dtype/test_dtype.py
@@ -1,48 +1,42 @@
 from __future__ import annotations
 
-import os
-import re
-import sys
 from typing import TYPE_CHECKING, Any, get_args
 
-import zarr
-from zarr.core.config import config
-from zarr.core.dtype.npy.bool import Bool
-from zarr.core.dtype.npy.complex import Complex64, Complex128
-from zarr.core.dtype.npy.float import Float16, Float32, Float64
-from zarr.core.dtype.npy.int import Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64
-from zarr.core.dtype.npy.sized import FixedLengthAscii, FixedLengthBytes, FixedLengthUnicode
-from zarr.core.dtype.npy.time import DateTime64
+from zarr.core.dtype import (
+    DTYPE,
+    Bool,
+    Complex64,
+    Complex128,
+    DateTime64,
+    FixedLengthAscii,
+    FixedLengthBytes,
+    FixedLengthUnicode,
+    Float16,
+    Float32,
+    Float64,
+    Int8,
+    Int16,
+    Int32,
+    Int64,
+    Structured,
+    UInt8,
+    UInt16,
+    UInt32,
+    UInt64,
+    VariableLengthString,
+    ZDType,
+)
 
 from .conftest import zdtype_examples
 
 if TYPE_CHECKING:
-    from collections.abc import Generator
-
     from zarr.core.common import ZarrFormat
-    from zarr.core.dtype.wrapper import _BaseDType, _BaseScalar
+    from zarr.core.dtype.wrapper import TBaseDType, TBaseScalar
 
 import numpy as np
 import pytest
 
-from zarr.core.dtype import (
-    DTYPE,
-    VariableLengthString,
-    ZDType,
-    data_type_registry,
-    get_data_type_from_json,
-)
 from zarr.core.dtype.common import DataTypeValidationError
-from zarr.core.dtype.npy.sized import (
-    Structured,
-)
-from zarr.core.dtype.registry import DataTypeRegistry
-
-
-@pytest.fixture
-def data_type_registry_fixture() -> DataTypeRegistry:
-    return DataTypeRegistry()
-
 
 _NUMPY_SUPPORTS_VLEN_STRING = hasattr(np.dtypes, "StringDType")
 VLEN_STRING_DTYPE: np.dtypes.StringDType | np.dtypes.ObjectDType
@@ -177,7 +171,7 @@ def test_default_value(wrapper: ZDType[Any, Any], expected_default: Any) -> None
     ],
 )
 def test_to_json_value_v2(
-    wrapper: ZDType[_BaseDType, _BaseScalar], input_value: Any, expected_json: Any
+    wrapper: ZDType[TBaseDType, TBaseScalar], input_value: Any, expected_json: Any
 ) -> None:
     """
     Test the to_json_value method for each dtype wrapper for zarr v2
@@ -213,7 +207,7 @@ def test_to_json_value_v2(
     ],
 )
 def test_to_json_value_v3(
-    wrapper: ZDType[_BaseDType, _BaseScalar], input_value: Any, expected_json: Any
+    wrapper: ZDType[TBaseDType, TBaseScalar], input_value: Any, expected_json: Any
 ) -> None:
     """
     Test the to_json_value method for each dtype wrapper for zarr v3
@@ -246,132 +240,9 @@ def test_to_json_value_v3(
     ],
 )
 def test_from_json_value(
-    wrapper: ZDType[_BaseDType, _BaseScalar], json_value: Any, expected_value: Any
+    wrapper: ZDType[TBaseDType, TBaseScalar], json_value: Any, expected_value: Any
 ) -> None:
     """
     Test the from_json_value method for each dtype wrapper.
     """
     assert wrapper.from_json_value(json_value, zarr_format=2) == expected_value
-
-
-class TestRegistry:
-    @staticmethod
-    def test_register(data_type_registry_fixture: DataTypeRegistry) -> None:
-        """
-        Test that registering a dtype in a data type registry works.
-        """
-        data_type_registry_fixture.register(Bool._zarr_v3_name, Bool)
-        assert data_type_registry_fixture.get(Bool._zarr_v3_name) == Bool
-        assert isinstance(data_type_registry_fixture.match_dtype(np.dtype("bool")), Bool)
-
-    @staticmethod
-    def test_override(data_type_registry_fixture: DataTypeRegistry) -> None:
-        """
-        Test that registering a new dtype with the same name works (overriding the previous one).
-        """
-        data_type_registry_fixture.register(Bool._zarr_v3_name, Bool)
-
-        class NewBool(Bool):
-            def default_value(self) -> np.bool_:
-                return np.True_
-
-        data_type_registry_fixture.register(NewBool._zarr_v3_name, NewBool)
-        assert isinstance(data_type_registry_fixture.match_dtype(np.dtype("bool")), NewBool)
-
-    @staticmethod
-    @pytest.mark.parametrize(
-        ("wrapper_cls", "dtype_str"), [(Bool, "bool"), (FixedLengthUnicode, "|U4")]
-    )
-    def test_match_dtype(
-        data_type_registry_fixture: DataTypeRegistry,
-        wrapper_cls: type[ZDType[_BaseDType, _BaseScalar]],
-        dtype_str: str,
-    ) -> None:
-        """
-        Test that match_dtype resolves a numpy dtype into an instance of the correspond wrapper for that dtype.
-        """
-        data_type_registry_fixture.register(wrapper_cls._zarr_v3_name, wrapper_cls)
-        assert isinstance(data_type_registry_fixture.match_dtype(np.dtype(dtype_str)), wrapper_cls)
-
-    @staticmethod
-    def test_unregistered_dtype(data_type_registry_fixture: DataTypeRegistry) -> None:
-        """
-        Test that match_dtype raises an error if the dtype is not registered.
-        """
-        outside_dtype = "int8"
-        with pytest.raises(
-            ValueError, match=f"No data type wrapper found that matches dtype '{outside_dtype}'"
-        ):
-            data_type_registry_fixture.match_dtype(np.dtype(outside_dtype))
-
-        with pytest.raises(KeyError):
-            data_type_registry_fixture.get(outside_dtype)
-
-    @staticmethod
-    @pytest.mark.parametrize("zdtype", zdtype_examples)
-    def test_registered_dtypes(
-        zdtype: ZDType[_BaseDType, _BaseScalar], zarr_format: ZarrFormat
-    ) -> None:
-        """
-        Test that the registered dtypes can be retrieved from the registry.
-        """
-
-        assert data_type_registry.match_dtype(zdtype.to_dtype()) == zdtype
-        assert (
-            data_type_registry.match_json(
-                zdtype.to_json(zarr_format=zarr_format), zarr_format=zarr_format
-            )
-            == zdtype
-        )
-
-    @staticmethod
-    @pytest.mark.parametrize("zdtype", zdtype_examples)
-    def test_match_dtype_unique(
-        zdtype: ZDType[Any, Any],
-        data_type_registry_fixture: DataTypeRegistry,
-        zarr_format: ZarrFormat,
-    ) -> None:
-        """
-        Test that the match_dtype method uniquely specifies a registered data type. We create a local registry
-        that excludes the data type class being tested, and ensure that an instance of the wrapped data type
-        fails to match anything in the registry
-        """
-        for _cls in get_args(DTYPE):
-            if _cls is not type(zdtype):
-                data_type_registry_fixture.register(_cls._zarr_v3_name, _cls)
-
-        dtype_instance = zdtype.to_dtype()
-
-        msg = f"No data type wrapper found that matches dtype '{dtype_instance}'"
-        with pytest.raises(ValueError, match=re.escape(msg)):
-            data_type_registry_fixture.match_dtype(dtype_instance)
-
-        instance_dict = zdtype.to_json(zarr_format=zarr_format)
-        msg = f"No data type wrapper found that matches {instance_dict}"
-        with pytest.raises(ValueError, match=re.escape(msg)):
-            data_type_registry_fixture.match_json(instance_dict, zarr_format=zarr_format)
-
-
-# this is copied from the registry tests -- we should deduplicate
-here = os.path.abspath(os.path.dirname(__file__))
-
-
-@pytest.fixture
-def set_path() -> Generator[None, None, None]:
-    sys.path.append(here)
-    zarr.registry._collect_entrypoints()
-    yield
-    sys.path.remove(here)
-    registries = zarr.registry._collect_entrypoints()
-    for registry in registries:
-        registry.lazy_load_list.clear()
-    config.reset()
-
-
-@pytest.mark.usefixtures("set_path")
-def test_entrypoint_codec(zarr_format: ZarrFormat) -> None:
-    from package_with_entrypoint import TestDataType
-
-    instance = TestDataType()
-    dtype_json = instance.to_json(zarr_format=zarr_format)
-    assert get_data_type_from_json(dtype_json, zarr_format=zarr_format) == instance
diff --git a/tests/test_dtype/test_npy/test_common.py b/tests/test_dtype/test_npy/test_common.py
new file mode 100644
index 0000000000..f3082d0c3b
--- /dev/null
+++ b/tests/test_dtype/test_npy/test_common.py
@@ -0,0 +1,277 @@
+from __future__ import annotations
+
+import base64
+import math
+import re
+import sys
+from typing import TYPE_CHECKING, Any, get_args
+
+import numpy as np
+import pytest
+
+from zarr.core.dtype.common import Endianness, JSONFloat, SpecialFloats
+from zarr.core.dtype.npy.common import (
+    EndiannessNumpy,
+    bytes_from_json,
+    bytes_to_json,
+    check_json_float,
+    check_json_float_v2,
+    check_json_float_v3,
+    check_json_int,
+    complex_float_to_json,
+    complex_float_to_json_v2,
+    complex_float_to_json_v3,
+    endianness_from_numpy_str,
+    endianness_to_numpy_str,
+    float_from_json,
+    float_from_json_v2,
+    float_from_json_v3,
+    float_to_json_v2,
+    float_to_json_v3,
+)
+
+if TYPE_CHECKING:
+    from zarr.core.common import ZarrFormat
+
+
+def nan_equal(a: object, b: object) -> bool:
+    """
+    Convenience function for equality comparison between two values ``a`` and ``b``, that might both
+    be NaN. Returns True if both ``a`` and ``b`` are NaN, otherwise returns a == b
+    """
+    if math.isnan(a) and math.isnan(b):  # type: ignore[arg-type]
+        return True
+    return a == b
+
+
+json_float_v2: list[tuple[JSONFloat, float | np.floating[Any]]] = [
+    ("Infinity", float("inf")),
+    ("Infinity", np.inf),
+    ("-Infinity", float("-inf")),
+    ("-Infinity", -np.inf),
+    ("NaN", float("nan")),
+    ("NaN", np.nan),
+    (1.0, 1.0),
+]
+
+# exactly the same as v2, for now, until we get support for the special NaN encoding defined in the
+# v3 spec
+json_float_v3: list[tuple[JSONFloat, float | np.floating[Any]]] = [
+    ("Infinity", float("inf")),
+    ("Infinity", np.inf),
+    ("-Infinity", float("-inf")),
+    ("-Infinity", -np.inf),
+    ("NaN", float("nan")),
+    ("NaN", np.nan),
+    (1.0, 1.0),
+]
+
+
+@pytest.mark.parametrize(
+    ("data", "expected"),
+    [(">", "big"), ("<", "little"), ("=", sys.byteorder), ("|", None), ("err", "")],
+)
+def test_endianness_from_numpy_str(data: str, expected: str | None) -> None:
+    """
+    Test that endianness_from_numpy_str correctly converts a numpy str literal to a human-readable literal value.
+    This test also checks that an invalid string input raises a ``ValueError``
+    """
+    if data in get_args(EndiannessNumpy):
+        assert endianness_from_numpy_str(data) == expected  # type: ignore[arg-type]
+    else:
+        msg = f"Invalid endianness: {data!r}. Expected one of {get_args(EndiannessNumpy)}"
+        with pytest.raises(ValueError, match=re.escape(msg)):
+            endianness_from_numpy_str(data)  # type: ignore[arg-type]
+
+
+@pytest.mark.parametrize(
+    ("data", "expected"),
+    [("big", ">"), ("little", "<"), (None, "|"), ("err", "")],
+)
+def test_endianness_to_numpy_str(data: str | None, expected: str) -> None:
+    """
+    Test that endianness_to_numpy_str correctly converts a human-readable literal value to a numpy str literal.
+    This test also checks that an invalid string input raises a ``ValueError``
+    """
+    if data in get_args(Endianness) + (None,):
+        assert endianness_to_numpy_str(data) == expected  # type: ignore[arg-type]
+    else:
+        msg = f"Invalid endianness: {data!r}. Expected one of {get_args(Endianness)}"
+        with pytest.raises(ValueError, match=re.escape(msg)):
+            endianness_to_numpy_str(data)  # type: ignore[arg-type]
+
+
+@pytest.mark.parametrize(("data", "expected"), json_float_v2 + [("SHOULD_ERR", "")])
+def test_float_from_json_v2(data: JSONFloat | str, expected: float | str) -> None:
+    """
+    Test that float_from_json_v2 correctly converts a JSON string representation of a float to a float.
+    This test also checks that an invalid string input raises a ``ValueError``
+    """
+    if data in get_args(SpecialFloats) or isinstance(data, float):
+        assert nan_equal(float_from_json_v2(data), expected)  # type: ignore[arg-type]
+    else:
+        msg = f"could not convert string to float: {data!r}"
+        with pytest.raises(ValueError, match=msg):
+            float_from_json_v2(data)  # type: ignore[arg-type]
+
+
+@pytest.mark.parametrize(("data", "expected"), json_float_v3 + [("SHOULD_ERR", "")])
+def test_float_from_json_v3(data: JSONFloat | str, expected: float | str) -> None:
+    """
+    Test that float_from_json_v3 correctly converts a JSON string representation of a float to a float.
+    This test also checks that an invalid string input raises a ``ValueError``
+    """
+    if data in get_args(SpecialFloats) or isinstance(data, float):
+        assert nan_equal(float_from_json_v3(data), expected)  # type: ignore[arg-type]
+    else:
+        msg = f"could not convert string to float: {data!r}"
+        with pytest.raises(ValueError, match=msg):
+            float_from_json_v3(data)  # type: ignore[arg-type]
+
+
+@pytest.mark.parametrize(("data", "expected"), json_float_v2)
+def test_float_from_json(data: JSONFloat, expected: float | str, zarr_format: ZarrFormat) -> None:
+    """
+    Test that float_from_json_v3 correctly converts a JSON string representation of a float to a float.
+    This test also checks that an invalid string input raises a ``ValueError``
+    """
+    observed = float_from_json(data, zarr_format=zarr_format)
+    if zarr_format == 2:
+        expected = float_from_json_v2(data)
+    else:
+        expected = float_from_json_v3(data)
+    assert nan_equal(observed, expected)
+
+
+# note the order of parameters relative to the order of the parametrized variable.
+@pytest.mark.parametrize(("expected", "data"), json_float_v2)
+def test_float_to_json_v2(data: float | np.floating[Any], expected: JSONFloat) -> None:
+    """
+    Test that floats are JSON-encoded properly for zarr v2
+    """
+    observed = float_to_json_v2(data)
+    assert observed == expected
+
+
+# note the order of parameters relative to the order of the parametrized variable.
+@pytest.mark.parametrize(("expected", "data"), json_float_v3)
+def test_float_to_json_v3(data: float | np.floating[Any], expected: JSONFloat) -> None:
+    """
+    Test that floats are JSON-encoded properly for zarr v3
+    """
+    observed = float_to_json_v3(data)
+    assert observed == expected
+
+
+def test_bytes_from_json(zarr_format: ZarrFormat) -> None:
+    """
+    Test that a string is interpreted as base64-encoded bytes using the ascii alphabet.
+    This test takes zarr_format as a parameter but doesn't actually do anything with it, because at
+    present there is no zarr-format-specific logic in the code being tested, but such logic may
+    exist in the future.
+    """
+    data = "\00"
+    assert bytes_from_json(data, zarr_format=zarr_format) == base64.b64decode(data.encode("ascii"))
+
+
+def test_bytes_to_json(zarr_format: ZarrFormat) -> None:
+    """
+    Test that bytes are encoded with base64 using the ascii alphabet.
+
+    This test takes zarr_format as a parameter but doesn't actually do anything with it, because at
+    present there is no zarr-format-specific logic in the code being tested, but such logic may
+    exist in the future.
+    """
+
+    data = b"asdas"
+    assert bytes_to_json(data, zarr_format=zarr_format) == base64.b64encode(data).decode("ascii")
+
+
+# note the order of parameters relative to the order of the parametrized variable.
+@pytest.mark.parametrize(("json_expected", "float_data"), json_float_v2)
+def test_complex_to_json_v2(float_data: float | np.floating[Any], json_expected: JSONFloat) -> None:
+    """
+    Test that complex numbers are correctly converted to JSON in v2 format.
+
+    This use the same test input as the float tests, but the conversion is tested
+    for complex numbers with real and imaginary parts equal to the float
+    values provided in the test cases.
+    """
+    cplx = complex(float_data, float_data)
+    cplx_npy = np.complex128(cplx)
+    assert complex_float_to_json_v2(cplx) == (json_expected, json_expected)
+    assert complex_float_to_json_v2(cplx_npy) == (json_expected, json_expected)
+
+
+# note the order of parameters relative to the order of the parametrized variable.
+@pytest.mark.parametrize(("json_expected", "float_data"), json_float_v3)
+def test_complex_to_json_v3(float_data: float | np.floating[Any], json_expected: JSONFloat) -> None:
+    """
+    Test that complex numbers are correctly converted to JSON in v3 format.
+
+    This use the same test input as the float tests, but the conversion is tested
+    for complex numbers with real and imaginary parts equal to the float
+    values provided in the test cases.
+    """
+    cplx = complex(float_data, float_data)
+    cplx_npy = np.complex128(cplx)
+    assert complex_float_to_json_v3(cplx) == (json_expected, json_expected)
+    assert complex_float_to_json_v3(cplx_npy) == (json_expected, json_expected)
+
+
+@pytest.mark.parametrize(("json_expected", "float_data"), json_float_v3)
+def test_complex_float_to_json(
+    float_data: float | np.floating[Any], json_expected: JSONFloat, zarr_format: ZarrFormat
+) -> None:
+    """
+    Test that complex numbers are correctly converted to JSON in v2 or v3 formats, depending
+    on the ``zarr_format`` keyword argument.
+
+    This use the same test input as the float tests, but the conversion is tested
+    for complex numbers with real and imaginary parts equal to the float
+    values provided in the test cases.
+    """
+
+    cplx = complex(float_data, float_data)
+    cplx_npy = np.complex128(cplx)
+    assert complex_float_to_json(cplx, zarr_format=zarr_format) == (json_expected, json_expected)
+    assert complex_float_to_json(cplx_npy, zarr_format=zarr_format) == (
+        json_expected,
+        json_expected,
+    )
+
+
+check_json_float_cases = get_args(SpecialFloats) + (1.0, 2)
+
+
+@pytest.mark.parametrize("data", check_json_float_cases)
+def test_check_json_float_v2_valid(data: JSONFloat | int) -> None:
+    assert check_json_float_v2(data)
+
+
+def test_check_json_float_v2_invalid() -> None:
+    assert not check_json_float_v2("invalid")
+
+
+@pytest.mark.parametrize("data", check_json_float_cases)
+def test_check_json_float_v3_valid(data: JSONFloat | int) -> None:
+    assert check_json_float_v3(data)
+
+
+def test_check_json_float_v3_invalid() -> None:
+    assert not check_json_float_v3("invalid")
+
+
+@pytest.mark.parametrize("data", check_json_float_cases)
+def test_check_json_float(data: JSONFloat | int, zarr_format: ZarrFormat) -> None:
+    observed = check_json_float(data, zarr_format=zarr_format)
+    if zarr_format == 2:
+        expected = check_json_float_v2(data)
+    else:
+        expected = check_json_float_v3(data)
+    assert observed == expected
+
+
+def test_check_json_int() -> None:
+    assert check_json_int(0)
+    assert not check_json_int(1.0)
diff --git a/tests/test_dtype/test_npy/test_int.py b/tests/test_dtype/test_npy/test_int.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/tests/test_dtype_registry.py b/tests/test_dtype_registry.py
new file mode 100644
index 0000000000..5e87945b3a
--- /dev/null
+++ b/tests/test_dtype_registry.py
@@ -0,0 +1,158 @@
+from __future__ import annotations
+
+import re
+import sys
+from pathlib import Path
+from typing import TYPE_CHECKING, Any, get_args
+
+import numpy as np
+import pytest
+
+import zarr
+from zarr.core.config import config
+from zarr.core.dtype import (
+    DTYPE,
+    Bool,
+    FixedLengthUnicode,
+    TBaseDType,
+    TBaseScalar,
+    ZDType,
+    data_type_registry,
+    get_data_type_from_json,
+)
+from zarr.core.dtype.registry import DataTypeRegistry
+
+from .conftest import zdtype_examples
+
+if TYPE_CHECKING:
+    from collections.abc import Generator
+
+    from zarr.core.common import ZarrFormat
+
+
+@pytest.fixture
+def data_type_registry_fixture() -> DataTypeRegistry:
+    return DataTypeRegistry()
+
+
+class TestRegistry:
+    @staticmethod
+    def test_register(data_type_registry_fixture: DataTypeRegistry) -> None:
+        """
+        Test that registering a dtype in a data type registry works.
+        """
+        data_type_registry_fixture.register(Bool._zarr_v3_name, Bool)
+        assert data_type_registry_fixture.get(Bool._zarr_v3_name) == Bool
+        assert isinstance(data_type_registry_fixture.match_dtype(np.dtype("bool")), Bool)
+
+    @staticmethod
+    def test_override(data_type_registry_fixture: DataTypeRegistry) -> None:
+        """
+        Test that registering a new dtype with the same name works (overriding the previous one).
+        """
+        data_type_registry_fixture.register(Bool._zarr_v3_name, Bool)
+
+        class NewBool(Bool):
+            def default_value(self) -> np.bool_:
+                return np.True_
+
+        data_type_registry_fixture.register(NewBool._zarr_v3_name, NewBool)
+        assert isinstance(data_type_registry_fixture.match_dtype(np.dtype("bool")), NewBool)
+
+    @staticmethod
+    @pytest.mark.parametrize(
+        ("wrapper_cls", "dtype_str"), [(Bool, "bool"), (FixedLengthUnicode, "|U4")]
+    )
+    def test_match_dtype(
+        data_type_registry_fixture: DataTypeRegistry,
+        wrapper_cls: type[ZDType[TBaseDType, TBaseScalar]],
+        dtype_str: str,
+    ) -> None:
+        """
+        Test that match_dtype resolves a numpy dtype into an instance of the correspond wrapper for that dtype.
+        """
+        data_type_registry_fixture.register(wrapper_cls._zarr_v3_name, wrapper_cls)
+        assert isinstance(data_type_registry_fixture.match_dtype(np.dtype(dtype_str)), wrapper_cls)
+
+    @staticmethod
+    def test_unregistered_dtype(data_type_registry_fixture: DataTypeRegistry) -> None:
+        """
+        Test that match_dtype raises an error if the dtype is not registered.
+        """
+        outside_dtype = "int8"
+        with pytest.raises(
+            ValueError, match=f"No data type wrapper found that matches dtype '{outside_dtype}'"
+        ):
+            data_type_registry_fixture.match_dtype(np.dtype(outside_dtype))
+
+        with pytest.raises(KeyError):
+            data_type_registry_fixture.get(outside_dtype)
+
+    @staticmethod
+    @pytest.mark.parametrize("zdtype", zdtype_examples)
+    def test_registered_dtypes(
+        zdtype: ZDType[TBaseDType, TBaseScalar], zarr_format: ZarrFormat
+    ) -> None:
+        """
+        Test that the registered dtypes can be retrieved from the registry.
+        """
+
+        assert data_type_registry.match_dtype(zdtype.to_dtype()) == zdtype
+        assert (
+            data_type_registry.match_json(
+                zdtype.to_json(zarr_format=zarr_format), zarr_format=zarr_format
+            )
+            == zdtype
+        )
+
+    @staticmethod
+    @pytest.mark.parametrize("zdtype", zdtype_examples)
+    def test_match_dtype_unique(
+        zdtype: ZDType[Any, Any],
+        data_type_registry_fixture: DataTypeRegistry,
+        zarr_format: ZarrFormat,
+    ) -> None:
+        """
+        Test that the match_dtype method uniquely specifies a registered data type. We create a local registry
+        that excludes the data type class being tested, and ensure that an instance of the wrapped data type
+        fails to match anything in the registry
+        """
+        for _cls in get_args(DTYPE):
+            if _cls is not type(zdtype):
+                data_type_registry_fixture.register(_cls._zarr_v3_name, _cls)
+
+        dtype_instance = zdtype.to_dtype()
+
+        msg = f"No data type wrapper found that matches dtype '{dtype_instance}'"
+        with pytest.raises(ValueError, match=re.escape(msg)):
+            data_type_registry_fixture.match_dtype(dtype_instance)
+
+        instance_dict = zdtype.to_json(zarr_format=zarr_format)
+        msg = f"No data type wrapper found that matches {instance_dict}"
+        with pytest.raises(ValueError, match=re.escape(msg)):
+            data_type_registry_fixture.match_json(instance_dict, zarr_format=zarr_format)
+
+
+# this is copied from the registry tests -- we should deduplicate
+here = str(Path(__file__).parent.absolute())
+
+
+@pytest.fixture
+def set_path() -> Generator[None, None, None]:
+    sys.path.append(here)
+    zarr.registry._collect_entrypoints()
+    yield
+    sys.path.remove(here)
+    registries = zarr.registry._collect_entrypoints()
+    for registry in registries:
+        registry.lazy_load_list.clear()
+    config.reset()
+
+
+@pytest.mark.usefixtures("set_path")
+def test_entrypoint_dtype(zarr_format: ZarrFormat) -> None:
+    from package_with_entrypoint import TestDataType
+
+    instance = TestDataType()
+    dtype_json = instance.to_json(zarr_format=zarr_format)
+    assert get_data_type_from_json(dtype_json, zarr_format=zarr_format) == instance

From d3f92043449b3d155318ac6494baa21a6a055064 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Fri, 2 May 2025 18:47:18 +0200
Subject: [PATCH 074/130] finish common tests

---
 src/zarr/core/dtype/npy/common.py        | 162 +++++++++++------------
 tests/conftest.py                        |  20 ---
 tests/test_dtype/confttest.py            |  22 +++
 tests/test_dtype/test_npy/test_common.py |  87 ++++++++++--
 4 files changed, 179 insertions(+), 112 deletions(-)
 create mode 100644 tests/test_dtype/confttest.py

diff --git a/src/zarr/core/dtype/npy/common.py b/src/zarr/core/dtype/npy/common.py
index 8ef1286e6f..8033e48291 100644
--- a/src/zarr/core/dtype/npy/common.py
+++ b/src/zarr/core/dtype/npy/common.py
@@ -261,6 +261,29 @@ def float_to_json_v3(data: float | np.floating[Any]) -> JSONFloat:
     return float_to_json_v2(data)
 
 
+def float_to_json(data: float | np.floating[Any], *, zarr_format: ZarrFormat) -> JSONFloat:
+    """
+    Convert a float to JSON, parametrized by the zarr format version.
+
+    Parameters
+    ----------
+    data : float | np.floating
+        The float value to convert.
+    zarr_format : ZarrFormat
+        The zarr format version.
+
+    Returns
+    -------
+    JSONFloat
+        The JSON representation of the float.
+    """
+    if zarr_format == 2:
+        return float_to_json_v2(data)
+    else:
+        return float_to_json_v3(data)
+    raise ValueError(f"Invalid zarr format: {zarr_format}. Expected 2 or 3.")
+
+
 def complex_float_to_json_v3(
     data: complex | np.complexfloating[Any, Any],
 ) -> tuple[JSONFloat, JSONFloat]:
@@ -324,26 +347,60 @@ def complex_float_to_json(
     raise ValueError(f"Invalid zarr format: {zarr_format}. Expected 2 or 3.")
 
 
-def float_to_json(data: float | np.floating[Any], *, zarr_format: ZarrFormat) -> JSONFloat:
+def complex_float_from_json_v2(data: tuple[JSONFloat, JSONFloat]) -> complex:
     """
-    Convert a float to JSON, parametrized by the zarr format version.
+    Convert a JSON complex float to a complex number (v2).
 
     Parameters
     ----------
-    data : float | np.floating
-        The float value to convert.
+    data : tuple[JSONFloat, JSONFloat]
+        The JSON complex float to convert.
+
+    Returns
+    -------
+    np.complexfloating
+        The complex number.
+    """
+    return complex(float_from_json_v2(data[0]), float_from_json_v2(data[1]))
+
+
+def complex_float_from_json_v3(data: tuple[JSONFloat, JSONFloat]) -> complex:
+    """
+    Convert a JSON complex float to a complex number (v3).
+
+    Parameters
+    ----------
+    data : tuple[JSONFloat, JSONFloat]
+        The JSON complex float to convert.
+
+    Returns
+    -------
+    np.complexfloating
+        The complex number.
+    """
+    return complex(float_from_json_v3(data[0]), float_from_json_v3(data[1]))
+
+
+def complex_float_from_json(data: tuple[JSONFloat, JSONFloat], zarr_format: ZarrFormat) -> complex:
+    """
+    Convert a JSON complex float to a complex number based on zarr format.
+
+    Parameters
+    ----------
+    data : tuple[JSONFloat, JSONFloat]
+        The JSON complex float to convert.
     zarr_format : ZarrFormat
         The zarr format version.
 
     Returns
     -------
-    JSONFloat
-        The JSON representation of the float.
+    np.complexfloating
+        The complex number.
     """
     if zarr_format == 2:
-        return float_to_json_v2(data)
+        return complex_float_from_json_v2(data)
     else:
-        return float_to_json_v3(data)
+        return complex_float_from_json_v3(data)
     raise ValueError(f"Invalid zarr format: {zarr_format}. Expected 2 or 3.")
 
 
@@ -366,9 +423,9 @@ def check_json_float_v2(data: JSON) -> TypeGuard[JSONFloat]:
     return isinstance(data, float | int)
 
 
-def check_json_complex_float_v2(data: JSON) -> TypeGuard[tuple[JSONFloat, JSONFloat]]:
+def check_json_float_v3(data: JSON) -> TypeGuard[JSONFloat]:
     """
-    Check if a JSON value represents a complex float, as per the behavior of zarr-python 2.x
+    Check if a JSON value represents a float (v3).
 
     Parameters
     ----------
@@ -378,20 +435,15 @@ def check_json_complex_float_v2(data: JSON) -> TypeGuard[tuple[JSONFloat, JSONFl
     Returns
     -------
     Bool
-        True if the data is a complex float, False otherwise.
+        True if the data is a float, False otherwise.
     """
-    return (
-        not isinstance(data, str)
-        and isinstance(data, Sequence)
-        and len(data) == 2
-        and check_json_float_v2(data[0])
-        and check_json_float_v2(data[1])
-    )
+    # TODO: handle the special JSON serialization of different NaN values
+    return check_json_float_v2(data)
 
 
-def check_json_float_v3(data: JSON) -> TypeGuard[JSONFloat]:
+def check_json_complex_float_v2(data: JSON) -> TypeGuard[tuple[JSONFloat, JSONFloat]]:
     """
-    Check if a JSON value represents a float (v3).
+    Check if a JSON value represents a complex float, as per the behavior of zarr-python 2.x
 
     Parameters
     ----------
@@ -401,10 +453,15 @@ def check_json_float_v3(data: JSON) -> TypeGuard[JSONFloat]:
     Returns
     -------
     Bool
-        True if the data is a float, False otherwise.
+        True if the data is a complex float, False otherwise.
     """
-    # TODO: handle the special JSON serialization of different NaN values
-    return check_json_float_v2(data)
+    return (
+        not isinstance(data, str)
+        and isinstance(data, Sequence)
+        and len(data) == 2
+        and check_json_float_v2(data[0])
+        and check_json_float_v2(data[1])
+    )
 
 
 def check_json_complex_float_v3(data: JSON) -> TypeGuard[tuple[JSONFloat, JSONFloat]]:
@@ -434,7 +491,7 @@ def check_json_complex_float(
     data: JSON, zarr_format: ZarrFormat
 ) -> TypeGuard[tuple[JSONFloat, JSONFloat]]:
     """
-    Check if a JSON value represents a complex float based on zarr format.
+    Check if a JSON value represents a complex float, given a zarr format.
 
     Parameters
     ----------
@@ -524,60 +581,3 @@ def check_json_bool(data: JSON) -> TypeGuard[bool]:
         True if the data is a boolean, False otherwise.
     """
     return isinstance(data, bool)
-
-
-def complex_float_from_json_v2(data: tuple[JSONFloat, JSONFloat]) -> complex:
-    """
-    Convert a JSON complex float to a complex number (v2).
-
-    Parameters
-    ----------
-    data : tuple[JSONFloat, JSONFloat]
-        The JSON complex float to convert.
-
-    Returns
-    -------
-    np.complexfloating
-        The complex number.
-    """
-    return complex(float_from_json_v2(data[0]), float_from_json_v2(data[1]))
-
-
-def complex_float_from_json_v3(data: tuple[JSONFloat, JSONFloat]) -> complex:
-    """
-    Convert a JSON complex float to a complex number (v3).
-
-    Parameters
-    ----------
-    data : tuple[JSONFloat, JSONFloat]
-        The JSON complex float to convert.
-
-    Returns
-    -------
-    np.complexfloating
-        The complex number.
-    """
-    return complex(float_from_json_v3(data[0]), float_from_json_v3(data[1]))
-
-
-def complex_float_from_json(data: tuple[JSONFloat, JSONFloat], zarr_format: ZarrFormat) -> complex:
-    """
-    Convert a JSON complex float to a complex number based on zarr format.
-
-    Parameters
-    ----------
-    data : tuple[JSONFloat, JSONFloat]
-        The JSON complex float to convert.
-    zarr_format : ZarrFormat
-        The zarr format version.
-
-    Returns
-    -------
-    np.complexfloating
-        The complex number.
-    """
-    if zarr_format == 2:
-        return complex_float_from_json_v2(data)
-    else:
-        return complex_float_from_json_v3(data)
-    raise ValueError(f"Invalid zarr format: {zarr_format}. Expected 2 or 3.")
diff --git a/tests/conftest.py b/tests/conftest.py
index 434763a4f3..7a075cb9ac 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -21,13 +21,8 @@
 from zarr.core.common import JSON, parse_shapelike
 from zarr.core.config import config as zarr_config
 from zarr.core.dtype import (
-    DateTime64,
-    Structured,
-    TimeDelta64,
-    data_type_registry,
     get_data_type_from_native_dtype,
 )
-from zarr.core.dtype.common import HasLength
 from zarr.core.metadata.v2 import ArrayV2Metadata
 from zarr.core.metadata.v3 import ArrayV3Metadata
 from zarr.core.sync import sync
@@ -43,7 +38,6 @@
     from zarr.core.array import CompressorsLike, FiltersLike, SerializerLike, ShardsLike
     from zarr.core.chunk_key_encodings import ChunkKeyEncoding, ChunkKeyEncodingLike
     from zarr.core.common import ChunkCoords, MemoryOrder, ShapeLike, ZarrFormat
-    from zarr.core.dtype.wrapper import ZDType
 
 
 async def parse_store(
@@ -420,17 +414,3 @@ def meta_from_array(
         chunk_key_encoding=chunk_key_encoding,
         dimension_names=dimension_names,
     )
-
-
-# Generate a collection of zdtype instances for use in testing.
-zdtype_examples: tuple[ZDType[Any, Any], ...] = ()
-for wrapper_cls in data_type_registry.contents.values():
-    # The Structured dtype has to be constructed with some actual fields
-    if wrapper_cls is Structured:
-        zdtype_examples += (wrapper_cls.from_dtype(np.dtype([("a", np.float64), ("b", np.int8)])),)
-    elif issubclass(wrapper_cls, HasLength):
-        zdtype_examples += (wrapper_cls(length=1),)
-    elif issubclass(wrapper_cls, DateTime64 | TimeDelta64):
-        zdtype_examples += (wrapper_cls(unit="s", interval=10),)
-    else:
-        zdtype_examples += (wrapper_cls(),)
diff --git a/tests/test_dtype/confttest.py b/tests/test_dtype/confttest.py
new file mode 100644
index 0000000000..aba08a08c5
--- /dev/null
+++ b/tests/test_dtype/confttest.py
@@ -0,0 +1,22 @@
+# Generate a collection of zdtype instances for use in testing.
+from typing import Any
+
+import numpy as np
+
+from zarr.core.dtype import data_type_registry
+from zarr.core.dtype.common import HasLength
+from zarr.core.dtype.npy.sized import Structured
+from zarr.core.dtype.npy.time import DateTime64, TimeDelta64
+from zarr.core.dtype.wrapper import ZDType
+
+zdtype_examples: tuple[ZDType[Any, Any], ...] = ()
+for wrapper_cls in data_type_registry.contents.values():
+    # The Structured dtype has to be constructed with some actual fields
+    if wrapper_cls is Structured:
+        zdtype_examples += (wrapper_cls.from_dtype(np.dtype([("a", np.float64), ("b", np.int8)])),)
+    elif issubclass(wrapper_cls, HasLength):
+        zdtype_examples += (wrapper_cls(length=1),)
+    elif issubclass(wrapper_cls, DateTime64 | TimeDelta64):
+        zdtype_examples += (wrapper_cls(unit="s", interval=10),)
+    else:
+        zdtype_examples += (wrapper_cls(),)
diff --git a/tests/test_dtype/test_npy/test_common.py b/tests/test_dtype/test_npy/test_common.py
index f3082d0c3b..69beae38e3 100644
--- a/tests/test_dtype/test_npy/test_common.py
+++ b/tests/test_dtype/test_npy/test_common.py
@@ -14,10 +14,15 @@
     EndiannessNumpy,
     bytes_from_json,
     bytes_to_json,
+    check_json_bool,
+    check_json_complex_float,
+    check_json_complex_float_v2,
+    check_json_complex_float_v3,
     check_json_float,
     check_json_float_v2,
     check_json_float_v3,
     check_json_int,
+    check_json_str,
     complex_float_to_json,
     complex_float_to_json_v2,
     complex_float_to_json_v3,
@@ -31,7 +36,7 @@
 )
 
 if TYPE_CHECKING:
-    from zarr.core.common import ZarrFormat
+    from zarr.core.common import JSON, ZarrFormat
 
 
 def nan_equal(a: object, b: object) -> bool:
@@ -44,7 +49,7 @@ def nan_equal(a: object, b: object) -> bool:
     return a == b
 
 
-json_float_v2: list[tuple[JSONFloat, float | np.floating[Any]]] = [
+json_float_v2_cases: list[tuple[JSONFloat, float | np.floating[Any]]] = [
     ("Infinity", float("inf")),
     ("Infinity", np.inf),
     ("-Infinity", float("-inf")),
@@ -56,7 +61,7 @@ def nan_equal(a: object, b: object) -> bool:
 
 # exactly the same as v2, for now, until we get support for the special NaN encoding defined in the
 # v3 spec
-json_float_v3: list[tuple[JSONFloat, float | np.floating[Any]]] = [
+json_float_v3_cases: list[tuple[JSONFloat, float | np.floating[Any]]] = [
     ("Infinity", float("inf")),
     ("Infinity", np.inf),
     ("-Infinity", float("-inf")),
@@ -101,7 +106,7 @@ def test_endianness_to_numpy_str(data: str | None, expected: str) -> None:
             endianness_to_numpy_str(data)  # type: ignore[arg-type]
 
 
-@pytest.mark.parametrize(("data", "expected"), json_float_v2 + [("SHOULD_ERR", "")])
+@pytest.mark.parametrize(("data", "expected"), json_float_v2_cases + [("SHOULD_ERR", "")])
 def test_float_from_json_v2(data: JSONFloat | str, expected: float | str) -> None:
     """
     Test that float_from_json_v2 correctly converts a JSON string representation of a float to a float.
@@ -115,7 +120,7 @@ def test_float_from_json_v2(data: JSONFloat | str, expected: float | str) -> Non
             float_from_json_v2(data)  # type: ignore[arg-type]
 
 
-@pytest.mark.parametrize(("data", "expected"), json_float_v3 + [("SHOULD_ERR", "")])
+@pytest.mark.parametrize(("data", "expected"), json_float_v3_cases + [("SHOULD_ERR", "")])
 def test_float_from_json_v3(data: JSONFloat | str, expected: float | str) -> None:
     """
     Test that float_from_json_v3 correctly converts a JSON string representation of a float to a float.
@@ -129,7 +134,7 @@ def test_float_from_json_v3(data: JSONFloat | str, expected: float | str) -> Non
             float_from_json_v3(data)  # type: ignore[arg-type]
 
 
-@pytest.mark.parametrize(("data", "expected"), json_float_v2)
+@pytest.mark.parametrize(("data", "expected"), json_float_v2_cases)
 def test_float_from_json(data: JSONFloat, expected: float | str, zarr_format: ZarrFormat) -> None:
     """
     Test that float_from_json_v3 correctly converts a JSON string representation of a float to a float.
@@ -144,7 +149,7 @@ def test_float_from_json(data: JSONFloat, expected: float | str, zarr_format: Za
 
 
 # note the order of parameters relative to the order of the parametrized variable.
-@pytest.mark.parametrize(("expected", "data"), json_float_v2)
+@pytest.mark.parametrize(("expected", "data"), json_float_v2_cases)
 def test_float_to_json_v2(data: float | np.floating[Any], expected: JSONFloat) -> None:
     """
     Test that floats are JSON-encoded properly for zarr v2
@@ -154,7 +159,7 @@ def test_float_to_json_v2(data: float | np.floating[Any], expected: JSONFloat) -
 
 
 # note the order of parameters relative to the order of the parametrized variable.
-@pytest.mark.parametrize(("expected", "data"), json_float_v3)
+@pytest.mark.parametrize(("expected", "data"), json_float_v3_cases)
 def test_float_to_json_v3(data: float | np.floating[Any], expected: JSONFloat) -> None:
     """
     Test that floats are JSON-encoded properly for zarr v3
@@ -188,7 +193,7 @@ def test_bytes_to_json(zarr_format: ZarrFormat) -> None:
 
 
 # note the order of parameters relative to the order of the parametrized variable.
-@pytest.mark.parametrize(("json_expected", "float_data"), json_float_v2)
+@pytest.mark.parametrize(("json_expected", "float_data"), json_float_v2_cases)
 def test_complex_to_json_v2(float_data: float | np.floating[Any], json_expected: JSONFloat) -> None:
     """
     Test that complex numbers are correctly converted to JSON in v2 format.
@@ -204,7 +209,7 @@ def test_complex_to_json_v2(float_data: float | np.floating[Any], json_expected:
 
 
 # note the order of parameters relative to the order of the parametrized variable.
-@pytest.mark.parametrize(("json_expected", "float_data"), json_float_v3)
+@pytest.mark.parametrize(("json_expected", "float_data"), json_float_v3_cases)
 def test_complex_to_json_v3(float_data: float | np.floating[Any], json_expected: JSONFloat) -> None:
     """
     Test that complex numbers are correctly converted to JSON in v3 format.
@@ -219,7 +224,7 @@ def test_complex_to_json_v3(float_data: float | np.floating[Any], json_expected:
     assert complex_float_to_json_v3(cplx_npy) == (json_expected, json_expected)
 
 
-@pytest.mark.parametrize(("json_expected", "float_data"), json_float_v3)
+@pytest.mark.parametrize(("json_expected", "float_data"), json_float_v3_cases)
 def test_complex_float_to_json(
     float_data: float | np.floating[Any], json_expected: JSONFloat, zarr_format: ZarrFormat
 ) -> None:
@@ -272,6 +277,66 @@ def test_check_json_float(data: JSONFloat | int, zarr_format: ZarrFormat) -> Non
     assert observed == expected
 
 
+check_json_complex_float_true_cases = (
+    [0.0, 1.0],
+    (0.0, 1.0),
+    [-1.0, "NaN"],
+    ["Infinity", 1.0],
+    ["Infinity", "NaN"],
+)
+
+check_json_complex_float_false_cases = (
+    0.0,
+    "foo",
+    [0.0],
+    [1.0, 2.0, 3.0],
+    [1.0, "_infinity_"],
+    {"hello": 1.0},
+)
+
+
+@pytest.mark.parametrize("data", check_json_complex_float_true_cases)
+def test_check_json_complex_float_v2_true(data: JSON) -> None:
+    assert check_json_complex_float_v2(data)
+
+
+@pytest.mark.parametrize("data", check_json_complex_float_false_cases)
+def test_check_json_complex_float_v2_false(data: JSON) -> None:
+    assert not check_json_complex_float_v2(data)
+
+
+@pytest.mark.parametrize("data", check_json_complex_float_true_cases)
+def test_check_json_complex_float_v3_true(data: JSON) -> None:
+    assert check_json_complex_float_v3(data)
+
+
+@pytest.mark.parametrize("data", check_json_complex_float_false_cases)
+def test_check_json_complex_float_v3_false(data: JSON) -> None:
+    assert not check_json_complex_float_v3(data)
+
+
+@pytest.mark.parametrize("data", check_json_complex_float_true_cases)
+def test_check_json_complex_float_true(data: JSON, zarr_format: ZarrFormat) -> None:
+    assert check_json_complex_float(data, zarr_format=zarr_format)
+
+
+@pytest.mark.parametrize("data", check_json_complex_float_false_cases)
+def test_check_json_complex_float_false(data: JSON, zarr_format: ZarrFormat) -> None:
+    assert not check_json_complex_float(data, zarr_format=zarr_format)
+
+
 def test_check_json_int() -> None:
     assert check_json_int(0)
     assert not check_json_int(1.0)
+
+
+def test_check_json_str() -> None:
+    assert check_json_str("0")
+    assert not check_json_str(1.0)
+
+
+def test_check_json_bool() -> None:
+    assert check_json_bool(True)
+    assert check_json_bool(False)
+    assert not check_json_bool(1.0)
+    assert not check_json_bool("True")

From fdf17e391e6e4285d3f2b6c9ec08bbbaf4ba6260 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Wed, 7 May 2025 13:15:34 +0200
Subject: [PATCH 075/130] wip: test infrastructure for dtypes

---
 tests/test_array.py                           |  2 +-
 .../test_dtype/{confttest.py => conftest.py}  |  6 ++
 tests/test_dtype/test_npy/test_common.py      | 10 +--
 tests/test_dtype/test_npy/test_int.py         | 32 +++++++
 tests/test_dtype/test_wrapper.py              | 86 +++++++++++++++++++
 tests/test_dtype_registry.py                  |  4 +-
 6 files changed, 128 insertions(+), 12 deletions(-)
 rename tests/test_dtype/{confttest.py => conftest.py} (80%)
 create mode 100644 tests/test_dtype/test_wrapper.py

diff --git a/tests/test_array.py b/tests/test_array.py
index 125672658a..450d1375a8 100644
--- a/tests/test_array.py
+++ b/tests/test_array.py
@@ -55,7 +55,7 @@
 from zarr.errors import ContainsArrayError, ContainsGroupError
 from zarr.storage import LocalStore, MemoryStore, StorePath
 
-from .conftest import zdtype_examples
+from .test_dtype.conftest import zdtype_examples
 
 if TYPE_CHECKING:
     from zarr.core.array_spec import ArrayConfigLike
diff --git a/tests/test_dtype/confttest.py b/tests/test_dtype/conftest.py
similarity index 80%
rename from tests/test_dtype/confttest.py
rename to tests/test_dtype/conftest.py
index aba08a08c5..6e171cb435 100644
--- a/tests/test_dtype/confttest.py
+++ b/tests/test_dtype/conftest.py
@@ -20,3 +20,9 @@
         zdtype_examples += (wrapper_cls(unit="s", interval=10),)
     else:
         zdtype_examples += (wrapper_cls(),)
+
+
+def pytest_generate_tests(metafunc):
+    for fixture_name in metafunc.fixturenames:
+        if hasattr(metafunc.cls, fixture_name):
+            metafunc.parametrize(fixture_name, getattr(metafunc.cls, fixture_name), scope="class")
diff --git a/tests/test_dtype/test_npy/test_common.py b/tests/test_dtype/test_npy/test_common.py
index 69beae38e3..69a14a92b0 100644
--- a/tests/test_dtype/test_npy/test_common.py
+++ b/tests/test_dtype/test_npy/test_common.py
@@ -61,15 +61,7 @@ def nan_equal(a: object, b: object) -> bool:
 
 # exactly the same as v2, for now, until we get support for the special NaN encoding defined in the
 # v3 spec
-json_float_v3_cases: list[tuple[JSONFloat, float | np.floating[Any]]] = [
-    ("Infinity", float("inf")),
-    ("Infinity", np.inf),
-    ("-Infinity", float("-inf")),
-    ("-Infinity", -np.inf),
-    ("NaN", float("nan")),
-    ("NaN", np.nan),
-    (1.0, 1.0),
-]
+json_float_v3_cases = json_float_v2_cases
 
 
 @pytest.mark.parametrize(
diff --git a/tests/test_dtype/test_npy/test_int.py b/tests/test_dtype/test_npy/test_int.py
index e69de29bb2..a90af53c58 100644
--- a/tests/test_dtype/test_npy/test_int.py
+++ b/tests/test_dtype/test_npy/test_int.py
@@ -0,0 +1,32 @@
+from __future__ import annotations
+
+import numpy as np
+
+from tests.test_dtype.test_wrapper import _TestZDType
+from zarr.core.dtype.npy.int import Int8
+
+
+class TestInt8(_TestZDType):
+    test_cls = Int8
+    valid_dtype = (np.dtype(np.int8),)
+    invalid_dtype = (
+        np.dtype(np.int16),
+        np.dtype(np.uint16),
+        np.dtype(np.float64),
+    )
+    valid_json_v2 = ("|i1",)
+    valid_json_v3_cases = ("int8",)
+    invalid_json_v2 = (
+        ">i1",
+        "int8",
+        "|f8",
+    )
+    invalid_json_v3 = (
+        "|i1",
+        "|f8",
+        {"name": "int8", "configuration": {"endianness": "little"}},
+    )
+
+    def test_check_value(self) -> None:
+        assert self.test_cls().check_value(1)
+        assert not self.test_cls().check_value(["foo"])
diff --git a/tests/test_dtype/test_wrapper.py b/tests/test_dtype/test_wrapper.py
new file mode 100644
index 0000000000..c6093ebb01
--- /dev/null
+++ b/tests/test_dtype/test_wrapper.py
@@ -0,0 +1,86 @@
+from __future__ import annotations
+
+from typing import Any, ClassVar
+
+import hypothesis.strategies as st
+import numpy as np
+from hypothesis.extra import numpy as npst
+
+from zarr.core.dtype.wrapper import TBaseDType, TBaseScalar, ZDType
+
+
+def all_dtypes() -> st.SearchStrategy[np.dtype[np.generic]]:
+    return (
+        npst.boolean_dtypes()
+        | npst.integer_dtypes(endianness="=")
+        | npst.unsigned_integer_dtypes(endianness="=")
+        | npst.floating_dtypes(endianness="=")
+        | npst.complex_number_dtypes(endianness="=")
+        | npst.byte_string_dtypes(endianness="=")
+        | npst.unicode_string_dtypes(endianness="=")
+        | npst.datetime64_dtypes(endianness="=")
+        | npst.timedelta64_dtypes(endianness="=")
+    )
+
+
+def get_classvar_attributes(cls: type) -> dict[str, Any]:
+    classvar_attributes = {}
+    for name, annotation in cls.__annotations__.items():
+        if getattr(annotation, "__origin__", None) is ClassVar:
+            classvar_attributes[name] = getattr(cls, name)
+    return classvar_attributes
+
+
+class _TestZDType:
+    test_cls: type[ZDType[TBaseDType, TBaseScalar]]
+
+    valid_dtype: ClassVar[tuple[TBaseDType, ...]] = ()
+    invalid_dtype: ClassVar[tuple[TBaseDType, ...]] = ()
+
+    valid_json_v2: ClassVar[tuple[str | dict[str, Any], ...]] = ()
+    invalid_json_v2: ClassVar[tuple[str | dict[str, Any], ...]] = ()
+
+    valid_json_v3: ClassVar[tuple[str | dict[str, Any], ...]] = ()
+    invalid_json_v3: ClassVar[tuple[str | dict[str, Any], ...]] = ()
+
+    def test_check_dtype_valid(self, valid_dtype: Any) -> None:
+        assert self.test_cls.check_dtype(valid_dtype)
+
+    def test_check_dtype_invalid(self, invalid_dtype: Any) -> None:
+        assert not self.test_cls.check_dtype(invalid_dtype)
+
+    def test_from_dtype_roundtrip(self, valid_dtype: Any) -> None:
+        zdtype = self.test_cls.from_dtype(valid_dtype)
+        assert zdtype.to_dtype() == valid_dtype
+
+    """ @abc.abstractmethod
+    def test_cast_value(self, value: Any) -> None:
+        raise NotImplementedError
+
+    @abc.abstractmethod
+    def test_check_value(self) -> None:
+        raise NotImplementedError
+
+    @abc.abstractmethod
+    def test_default_value(self) -> None:
+        raise NotImplementedError
+
+    @abc.abstractmethod
+    def test_check_json(self, value: Any) -> None:
+        raise NotImplementedError
+
+    @abc.abstractmethod
+    def test_from_json_roundtrip_v2(self, value: Any) -> None:
+        raise NotImplementedError
+
+    @abc.abstractmethod
+    def test_from_json_roundtrip_v3(self, value: Any) -> None:
+        raise NotImplementedError
+
+    @abc.abstractmethod
+    def test_from_json_value_roundtrip_v2(self, value: Any) -> None:
+        raise NotImplementedError
+
+    @abc.abstractmethod
+    def test_from_json_value_roundtrip_v3(self, value: Any) -> None:
+        raise NotImplementedError """
diff --git a/tests/test_dtype_registry.py b/tests/test_dtype_registry.py
index 5e87945b3a..98380b86f7 100644
--- a/tests/test_dtype_registry.py
+++ b/tests/test_dtype_registry.py
@@ -22,13 +22,13 @@
 )
 from zarr.core.dtype.registry import DataTypeRegistry
 
-from .conftest import zdtype_examples
-
 if TYPE_CHECKING:
     from collections.abc import Generator
 
     from zarr.core.common import ZarrFormat
 
+from .test_dtype.conftest import zdtype_examples
+
 
 @pytest.fixture
 def data_type_registry_fixture() -> DataTypeRegistry:

From 4afa42af137a5a5736e0dfaf9fbc5e4747abc750 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Wed, 7 May 2025 18:03:00 +0200
Subject: [PATCH 076/130] wip: use class-based tests for all dtypes

---
 tests/test_dtype/conftest.py              |  32 ++++-
 tests/test_dtype/test_npy/test_bool.py    |  28 ++++
 tests/test_dtype/test_npy/test_complex.py |  50 +++++++
 tests/test_dtype/test_npy/test_float.py   |  72 ++++++++++
 tests/test_dtype/test_npy/test_int.py     | 162 +++++++++++++++++++++-
 tests/test_dtype/test_npy/test_sized.py   | 131 +++++++++++++++++
 tests/test_dtype/test_npy/test_string.py  |  50 +++++++
 tests/test_dtype/test_npy/test_time.py    |  54 ++++++++
 tests/test_dtype/test_wrapper.py          |  24 ++--
 9 files changed, 588 insertions(+), 15 deletions(-)
 create mode 100644 tests/test_dtype/test_npy/test_bool.py
 create mode 100644 tests/test_dtype/test_npy/test_complex.py
 create mode 100644 tests/test_dtype/test_npy/test_float.py
 create mode 100644 tests/test_dtype/test_npy/test_sized.py
 create mode 100644 tests/test_dtype/test_npy/test_string.py
 create mode 100644 tests/test_dtype/test_npy/test_time.py

diff --git a/tests/test_dtype/conftest.py b/tests/test_dtype/conftest.py
index 6e171cb435..2b4bb0b685 100644
--- a/tests/test_dtype/conftest.py
+++ b/tests/test_dtype/conftest.py
@@ -22,7 +22,37 @@
         zdtype_examples += (wrapper_cls(),)
 
 
-def pytest_generate_tests(metafunc):
+def pytest_generate_tests(metafunc: Any) -> None:
+    """
+    pytest hook to parametrize class-scoped fixtures.
+
+    This hook allows us to define class-scoped fixtures as class attributes and then
+    generate the parametrize calls for pytest. This allows the fixtures to be
+    reused across multiple tests within the same class.
+
+    For example, if you had a regular pytest class like this:
+
+    class TestClass:
+       @pytest.mark.parametrize("param_a", [1, 2, 3])
+        def test_method(self, param_a):
+            ...
+
+    Child classes inheriting from ``TestClass`` would not be able to override the ``param_a`` fixture
+
+    this implementation of ``pytest_generate_tests`` allows you to define class-scoped fixtures as
+    class attributes, which allows the following to work:
+
+    class TestExample:
+        param_a = [1, 2, 3]
+
+        def test_example(self, param_a):
+            ...
+
+    # this class will have its test_example method parametrized with the values of TestB.param_a
+    class TestB(TestExample):
+        param_a = [1, 2, 100, 10]
+
+    """
     for fixture_name in metafunc.fixturenames:
         if hasattr(metafunc.cls, fixture_name):
             metafunc.parametrize(fixture_name, getattr(metafunc.cls, fixture_name), scope="class")
diff --git a/tests/test_dtype/test_npy/test_bool.py b/tests/test_dtype/test_npy/test_bool.py
new file mode 100644
index 0000000000..e4e5dd541e
--- /dev/null
+++ b/tests/test_dtype/test_npy/test_bool.py
@@ -0,0 +1,28 @@
+from __future__ import annotations
+
+import numpy as np
+
+from tests.test_dtype.test_wrapper import _TestZDType
+from zarr.core.dtype.npy.bool import Bool
+
+
+class TestBool(_TestZDType):
+    test_cls = Bool
+    valid_dtype = (np.dtype(np.bool_),)
+    invalid_dtype = (
+        np.dtype(np.int8),
+        np.dtype(np.float64),
+        np.dtype(np.uint16),
+    )
+    valid_json_v2 = Bool._zarr_v2_names
+    valid_json_v3_cases = (Bool._zarr_v3_name,)
+    invalid_json_v2 = (
+        "|b1",
+        "bool",
+        "|f8",
+    )
+    invalid_json_v3 = (
+        "|b1",
+        "|f8",
+        {"name": "bool", "configuration": {"endianness": "little"}},
+    )
diff --git a/tests/test_dtype/test_npy/test_complex.py b/tests/test_dtype/test_npy/test_complex.py
new file mode 100644
index 0000000000..6621d625d9
--- /dev/null
+++ b/tests/test_dtype/test_npy/test_complex.py
@@ -0,0 +1,50 @@
+from __future__ import annotations
+
+import numpy as np
+
+from tests.test_dtype.test_wrapper import _TestZDType
+from zarr.core.dtype.npy.complex import Complex64, Complex128
+
+
+class TestComplex64(_TestZDType):
+    test_cls = Complex64
+    valid_dtype = (np.dtype(">c8"), np.dtype("<c8"))
+    invalid_dtype = (
+        np.dtype(np.int8),
+        np.dtype(np.float64),
+        np.dtype(np.complex128),
+    )
+    valid_json_v2 = Complex64._zarr_v2_names
+    valid_json_v3_cases = (Complex64._zarr_v3_name,)
+    invalid_json_v2 = (
+        "|c8",
+        "complex64",
+        "|f8",
+    )
+    invalid_json_v3 = (
+        "|c8",
+        "|f8",
+        {"name": "complex64", "configuration": {"endianness": "little"}},
+    )
+
+
+class TestComplex128(_TestZDType):
+    test_cls = Complex128
+    valid_dtype = (np.dtype(">c16"), np.dtype("<c16"))
+    invalid_dtype = (
+        np.dtype(np.int8),
+        np.dtype(np.float64),
+        np.dtype(np.complex64),
+    )
+    valid_json_v2 = Complex128._zarr_v2_names
+    valid_json_v3_cases = (Complex128._zarr_v3_name,)
+    invalid_json_v2 = (
+        "|c16",
+        "complex128",
+        "|f8",
+    )
+    invalid_json_v3 = (
+        "|c16",
+        "|f8",
+        {"name": "complex128", "configuration": {"endianness": "little"}},
+    )
diff --git a/tests/test_dtype/test_npy/test_float.py b/tests/test_dtype/test_npy/test_float.py
new file mode 100644
index 0000000000..a9de0145c6
--- /dev/null
+++ b/tests/test_dtype/test_npy/test_float.py
@@ -0,0 +1,72 @@
+from __future__ import annotations
+
+import numpy as np
+
+from tests.test_dtype.test_wrapper import _TestZDType
+from zarr.core.dtype.npy.float import Float16, Float32, Float64
+
+
+class TestFloat16(_TestZDType):
+    test_cls = Float16
+    valid_dtype = (np.dtype(">f2"), np.dtype("<f2"))
+    invalid_dtype = (
+        np.dtype(np.int8),
+        np.dtype(np.uint16),
+        np.dtype(np.float32),
+    )
+    valid_json_v2 = Float16._zarr_v2_names
+    valid_json_v3_cases = (Float16._zarr_v3_name,)
+    invalid_json_v2 = (
+        "|f2",
+        "float16",
+        "|i1",
+    )
+    invalid_json_v3 = (
+        "|f2",
+        "|i1",
+        {"name": "float16", "configuration": {"endianness": "little"}},
+    )
+
+
+class TestFloat32(_TestZDType):
+    test_cls = Float32
+    valid_dtype = (np.dtype(">f4"), np.dtype("<f4"))
+    invalid_dtype = (
+        np.dtype(np.int8),
+        np.dtype(np.uint16),
+        np.dtype(np.float64),
+    )
+    valid_json_v2 = Float32._zarr_v2_names
+    valid_json_v3_cases = (Float32._zarr_v3_name,)
+    invalid_json_v2 = (
+        "|f4",
+        "float32",
+        "|i1",
+    )
+    invalid_json_v3 = (
+        "|f4",
+        "|i1",
+        {"name": "float32", "configuration": {"endianness": "little"}},
+    )
+
+
+class TestFloat64(_TestZDType):
+    test_cls = Float64
+    valid_dtype = (np.dtype(">f8"), np.dtype("<f8"))
+    invalid_dtype = (
+        np.dtype(np.int8),
+        np.dtype(np.uint16),
+        np.dtype(np.float32),
+    )
+    valid_json_v2 = Float64._zarr_v2_names
+    valid_json_v3_cases = (Float64._zarr_v3_name,)
+    invalid_json_v2 = (
+        "|f8",
+        "float64",
+        "|i1",
+    )
+    invalid_json_v3 = (
+        "|f8",
+        "|i1",
+        {"name": "float64", "configuration": {"endianness": "little"}},
+    )
diff --git a/tests/test_dtype/test_npy/test_int.py b/tests/test_dtype/test_npy/test_int.py
index a90af53c58..2f149ff58f 100644
--- a/tests/test_dtype/test_npy/test_int.py
+++ b/tests/test_dtype/test_npy/test_int.py
@@ -3,7 +3,7 @@
 import numpy as np
 
 from tests.test_dtype.test_wrapper import _TestZDType
-from zarr.core.dtype.npy.int import Int8
+from zarr.core.dtype.npy.int import Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64
 
 
 class TestInt8(_TestZDType):
@@ -14,8 +14,8 @@ class TestInt8(_TestZDType):
         np.dtype(np.uint16),
         np.dtype(np.float64),
     )
-    valid_json_v2 = ("|i1",)
-    valid_json_v3_cases = ("int8",)
+    valid_json_v2 = Int8._zarr_v2_names
+    valid_json_v3_cases = (Int8._zarr_v3_name,)
     invalid_json_v2 = (
         ">i1",
         "int8",
@@ -27,6 +27,156 @@ class TestInt8(_TestZDType):
         {"name": "int8", "configuration": {"endianness": "little"}},
     )
 
-    def test_check_value(self) -> None:
-        assert self.test_cls().check_value(1)
-        assert not self.test_cls().check_value(["foo"])
+
+class TestInt16(_TestZDType):
+    test_cls = Int16
+    valid_dtype = (np.dtype(">i2"), np.dtype("<i2"))
+    invalid_dtype = (
+        np.dtype(np.int8),
+        np.dtype(np.uint16),
+        np.dtype(np.float64),
+    )
+    valid_json_v2 = Int16._zarr_v2_names
+    valid_json_v3_cases = (Int16._zarr_v3_name,)
+    invalid_json_v2 = (
+        "|i2",
+        "int16",
+        "|f8",
+    )
+    invalid_json_v3 = (
+        "|i2",
+        "|f8",
+        {"name": "int16", "configuration": {"endianness": "little"}},
+    )
+
+
+class TestInt32(_TestZDType):
+    test_cls = Int32
+    valid_dtype = (np.dtype(">i4"), np.dtype("<i4"))
+    invalid_dtype = (
+        np.dtype(np.int8),
+        np.dtype(np.uint16),
+        np.dtype(np.float64),
+    )
+    valid_json_v2 = Int32._zarr_v2_names
+    valid_json_v3_cases = (Int32._zarr_v3_name,)
+    invalid_json_v2 = (
+        "|i4",
+        "int32",
+        "|f8",
+    )
+    invalid_json_v3 = (
+        "|i4",
+        "|f8",
+        {"name": "int32", "configuration": {"endianness": "little"}},
+    )
+
+
+class TestInt64(_TestZDType):
+    test_cls = Int64
+    valid_dtype = (np.dtype(">i8"), np.dtype("<i8"))
+    invalid_dtype = (
+        np.dtype(np.int8),
+        np.dtype(np.uint16),
+        np.dtype(np.float64),
+    )
+    valid_json_v2 = Int64._zarr_v2_names
+    valid_json_v3_cases = (Int64._zarr_v3_name,)
+    invalid_json_v2 = (
+        "|i8",
+        "int64",
+        "|f8",
+    )
+    invalid_json_v3 = (
+        "|i8",
+        "|f8",
+        {"name": "int64", "configuration": {"endianness": "little"}},
+    )
+
+
+class TestUInt8(_TestZDType):
+    test_cls = UInt8
+    valid_dtype = (np.dtype(np.uint8),)
+    invalid_dtype = (
+        np.dtype(np.int8),
+        np.dtype(np.int16),
+        np.dtype(np.float64),
+    )
+    valid_json_v2 = UInt8._zarr_v2_names
+    valid_json_v3_cases = (UInt8._zarr_v3_name,)
+    invalid_json_v2 = (
+        "|u1",
+        "uint8",
+        "|f8",
+    )
+    invalid_json_v3 = (
+        "|u1",
+        "|f8",
+        {"name": "uint8", "configuration": {"endianness": "little"}},
+    )
+
+
+class TestUInt16(_TestZDType):
+    test_cls = UInt16
+    valid_dtype = (np.dtype(">u2"), np.dtype("<u2"))
+    invalid_dtype = (
+        np.dtype(np.int8),
+        np.dtype(np.int16),
+        np.dtype(np.float64),
+    )
+    valid_json_v2 = UInt16._zarr_v2_names
+    valid_json_v3_cases = (UInt16._zarr_v3_name,)
+    invalid_json_v2 = (
+        "|u2",
+        "uint16",
+        "|f8",
+    )
+    invalid_json_v3 = (
+        "|u2",
+        "|f8",
+        {"name": "uint16", "configuration": {"endianness": "little"}},
+    )
+
+
+class TestUInt32(_TestZDType):
+    test_cls = UInt32
+    valid_dtype = (np.dtype(">u4"), np.dtype("<u4"))
+    invalid_dtype = (
+        np.dtype(np.int8),
+        np.dtype(np.int16),
+        np.dtype(np.float64),
+    )
+    valid_json_v2 = UInt32._zarr_v2_names
+    valid_json_v3_cases = (UInt32._zarr_v3_name,)
+    invalid_json_v2 = (
+        "|u4",
+        "uint32",
+        "|f8",
+    )
+    invalid_json_v3 = (
+        "|u4",
+        "|f8",
+        {"name": "uint32", "configuration": {"endianness": "little"}},
+    )
+
+
+class TestUInt64(_TestZDType):
+    test_cls = UInt64
+    valid_dtype = (np.dtype(">u8"), np.dtype("<u8"))
+    invalid_dtype = (
+        np.dtype(np.int8),
+        np.dtype(np.int16),
+        np.dtype(np.float64),
+    )
+    valid_json_v2 = UInt64._zarr_v2_names
+    valid_json_v3_cases = (UInt64._zarr_v3_name,)
+    invalid_json_v2 = (
+        "|u8",
+        "uint64",
+        "|f8",
+    )
+    invalid_json_v3 = (
+        "|u8",
+        "|f8",
+        {"name": "uint64", "configuration": {"endianness": "little"}},
+    )
diff --git a/tests/test_dtype/test_npy/test_sized.py b/tests/test_dtype/test_npy/test_sized.py
new file mode 100644
index 0000000000..887d734fd3
--- /dev/null
+++ b/tests/test_dtype/test_npy/test_sized.py
@@ -0,0 +1,131 @@
+from __future__ import annotations
+
+import numpy as np
+
+from tests.test_dtype.test_wrapper import _TestZDType
+from zarr.core.dtype.npy.sized import (
+    FixedLengthAscii,
+    FixedLengthBytes,
+    FixedLengthUnicode,
+    Structured,
+)
+
+
+class TestFixedLengthAscii(_TestZDType):
+    test_cls = FixedLengthAscii
+    valid_dtype = (np.dtype("|S10"), np.dtype("|S4"))
+    invalid_dtype = (
+        np.dtype(np.int8),
+        np.dtype(np.float64),
+        np.dtype("|U10"),
+    )
+    valid_json_v2 = ("|S0", "|S2", "|S4")
+    valid_json_v3_cases = (
+        {"name": "numpy.fixed_length_ascii", "configuration": {"length_bits": 80}},
+    )
+    invalid_json_v2 = (
+        "|S",
+        "|U10",
+        "|f8",
+    )
+    invalid_json_v3 = (
+        {"name": "numpy.fixed_length_ascii", "configuration": {"length_bits": 0}},
+        {"name": "numpy.fixed_length_ascii", "configuration": {"length_bits": "invalid"}},
+    )
+
+
+class TestFixedLengthBytes(_TestZDType):
+    test_cls = FixedLengthBytes
+    valid_dtype = (np.dtype("|V10"),)
+    invalid_dtype = (
+        np.dtype(np.int8),
+        np.dtype(np.float64),
+        np.dtype("|S10"),
+    )
+    valid_json_v2 = ("|V10",)
+    valid_json_v3_cases = ({"name": "r80"},)
+    invalid_json_v2 = (
+        "|V",
+        "|S10",
+        "|f8",
+    )
+    invalid_json_v3 = (
+        {"name": "r0"},
+        {"name": "r-80"},
+    )
+
+
+class TestFixedLengthUnicode(_TestZDType):
+    test_cls = FixedLengthUnicode
+    valid_dtype = (np.dtype(">U10"), np.dtype("<U10"))
+    invalid_dtype = (
+        np.dtype(np.int8),
+        np.dtype(np.float64),
+        np.dtype("|S10"),
+    )
+    valid_json_v2 = (">U10", "<U10")
+    valid_json_v3_cases = (
+        {"name": "numpy.fixed_length_ucs4", "configuration": {"length_bits": 320}},
+    )
+    invalid_json_v2 = (
+        "|U",
+        "|S10",
+        "|f8",
+    )
+    invalid_json_v3 = (
+        {"name": "numpy.fixed_length_ucs4", "configuration": {"length_bits": 0}},
+        {"name": "numpy.fixed_length_ucs4", "configuration": {"length_bits": "invalid"}},
+    )
+
+
+class TestStructured(_TestZDType):
+    test_cls = Structured
+    valid_dtype = (
+        np.dtype([("field1", np.int32), ("field2", np.float64)]),
+        np.dtype([("field1", np.int64), ("field2", np.int32)]),
+    )
+    invalid_dtype = (
+        np.dtype(np.int8),
+        np.dtype(np.float64),
+        np.dtype("|S10"),
+    )
+    valid_json_v2 = (
+        [("field1", ">i4"), ("field2", ">f8")],
+        [("field1", ">i8"), ("field2", ">i4")],
+    )
+    valid_json_v3_cases = (
+        {
+            "name": "structured",
+            "configuration": {
+                "fields": [
+                    ("field1", {"name": "int32", "configuration": {"endianness": "big"}}),
+                    ("field2", {"name": "float64", "configuration": {"endianness": "big"}}),
+                ]
+            },
+        },
+        {
+            "name": "structured",
+            "configuration": {
+                "fields": [
+                    ("field1", {"name": "int64", "configuration": {"endianness": "big"}}),
+                    ("field2", {"name": "int32", "configuration": {"endianness": "big"}}),
+                ]
+            },
+        },
+    )
+    invalid_json_v2 = (
+        [("field1", "|i1"), ("field2", "|f8")],
+        [("field1", "|S10"), ("field2", "|f8")],
+    )
+    invalid_json_v3 = (
+        {
+            "name": "structured",
+            "configuration": {
+                "fields": [
+                    ("field1", {"name": "int32", "configuration": {"endianness": "invalid"}}),
+                    ("field2", {"name": "float64", "configuration": {"endianness": "big"}}),
+                ]
+            },
+        },
+        {"name": "invalid_name"},
+    )
diff --git a/tests/test_dtype/test_npy/test_string.py b/tests/test_dtype/test_npy/test_string.py
new file mode 100644
index 0000000000..2f77379f01
--- /dev/null
+++ b/tests/test_dtype/test_npy/test_string.py
@@ -0,0 +1,50 @@
+from __future__ import annotations
+
+import numpy as np
+
+from tests.test_dtype.test_wrapper import _TestZDType
+from zarr.core.dtype.npy.string import _NUMPY_SUPPORTS_VLEN_STRING, VariableLengthString
+
+if _NUMPY_SUPPORTS_VLEN_STRING:
+
+    class TestVariableLengthString(_TestZDType):
+        test_cls = VariableLengthString
+        valid_dtype = (np.dtypes.StringDType(),)
+        invalid_dtype = (
+            np.dtype(np.int8),
+            np.dtype(np.float64),
+            np.dtype("|S10"),
+        )
+        valid_json_v2 = ("|O",)
+        valid_json_v3_cases = ({"name": "numpy.variable_length_utf8"},)
+        invalid_json_v2 = (
+            "|S10",
+            "|f8",
+            "invalid",
+        )
+        invalid_json_v3 = (
+            {"name": "numpy.variable_length_utf8", "configuration": {"invalid_key": "value"}},
+            {"name": "invalid_name"},
+        )
+
+else:
+
+    class TestVariableLengthString(_TestZDType):
+        test_cls = VariableLengthString
+        valid_dtype = (np.dtype("O"),)
+        invalid_dtype = (
+            np.dtype(np.int8),
+            np.dtype(np.float64),
+            np.dtype("|S10"),
+        )
+        valid_json_v2 = ("|O",)
+        valid_json_v3_cases = ({"name": "numpy.variable_length_utf8"},)
+        invalid_json_v2 = (
+            "|S10",
+            "|f8",
+            "invalid",
+        )
+        invalid_json_v3 = (
+            {"name": "numpy.variable_length_utf8", "configuration": {"invalid_key": "value"}},
+            {"name": "invalid_name"},
+        )
diff --git a/tests/test_dtype/test_npy/test_time.py b/tests/test_dtype/test_npy/test_time.py
new file mode 100644
index 0000000000..a5d2cce545
--- /dev/null
+++ b/tests/test_dtype/test_npy/test_time.py
@@ -0,0 +1,54 @@
+from __future__ import annotations
+
+import numpy as np
+
+from tests.test_dtype.test_wrapper import _TestZDType
+from zarr.core.dtype.npy.time import DateTime64, TimeDelta64
+
+
+class TestDateTime64(_TestZDType):
+    test_cls = DateTime64
+    valid_dtype = (np.dtype("datetime64[10ns]"), np.dtype("datetime64[us]"), np.dtype("datetime64"))
+    invalid_dtype = (
+        np.dtype(np.int8),
+        np.dtype(np.float64),
+        np.dtype("timedelta64[ns]"),
+    )
+    valid_json_v2 = (">M8", ">M8[s]", "<M8[10s]", "<M8[10us]")
+    valid_json_v3_cases = (
+        {"name": "datetime64", "configuration": {"unit": "ns", "scale_factor": 10}},
+        {"name": "datetime64", "configuration": {"unit": "us", "scale_factor": 1}},
+    )
+    invalid_json_v2 = (
+        "datetime64",
+        "|f8",
+        "timedelta64[ns]",
+    )
+    invalid_json_v3 = (
+        {"name": "datetime64", "configuration": {"unit": "invalid"}},
+        {"name": "datetime64", "configuration": {"unit": 123}},
+    )
+
+
+class TestTimeDelta64(_TestZDType):
+    test_cls = TimeDelta64
+    valid_dtype = (np.dtype("timedelta64[ns]"), np.dtype("timedelta64[us]"))
+    invalid_dtype = (
+        np.dtype(np.int8),
+        np.dtype(np.float64),
+        np.dtype("datetime64[ns]"),
+    )
+    valid_json_v2 = TimeDelta64._zarr_v2_names
+    valid_json_v3_cases = (
+        {"name": "timedelta64", "configuration": {"unit": "ns"}},
+        {"name": "timedelta64", "configuration": {"unit": "us"}},
+    )
+    invalid_json_v2 = (
+        "timedelta64",
+        "|f8",
+        "datetime64[ns]",
+    )
+    invalid_json_v3 = (
+        {"name": "timedelta64", "configuration": {"unit": "invalid"}},
+        {"name": "timedelta64", "configuration": {"unit": 123}},
+    )
diff --git a/tests/test_dtype/test_wrapper.py b/tests/test_dtype/test_wrapper.py
index c6093ebb01..bbe74d9a0f 100644
--- a/tests/test_dtype/test_wrapper.py
+++ b/tests/test_dtype/test_wrapper.py
@@ -37,22 +37,30 @@ class _TestZDType:
     valid_dtype: ClassVar[tuple[TBaseDType, ...]] = ()
     invalid_dtype: ClassVar[tuple[TBaseDType, ...]] = ()
 
-    valid_json_v2: ClassVar[tuple[str | dict[str, Any], ...]] = ()
-    invalid_json_v2: ClassVar[tuple[str | dict[str, Any], ...]] = ()
+    valid_json_v2: ClassVar[tuple[str | dict[str, object] | list[object], ...]] = ()
+    invalid_json_v2: ClassVar[tuple[str | dict[str, object] | list[object], ...]] = ()
 
-    valid_json_v3: ClassVar[tuple[str | dict[str, Any], ...]] = ()
-    invalid_json_v3: ClassVar[tuple[str | dict[str, Any], ...]] = ()
+    valid_json_v3: ClassVar[tuple[str | dict[str, object], ...]] = ()
+    invalid_json_v3: ClassVar[tuple[str | dict[str, object], ...]] = ()
 
-    def test_check_dtype_valid(self, valid_dtype: Any) -> None:
-        assert self.test_cls.check_dtype(valid_dtype)
+    def test_check_dtype_valid(self, valid_dtype: object) -> None:
+        assert self.test_cls.check_dtype(valid_dtype)  # type: ignore[arg-type]
 
-    def test_check_dtype_invalid(self, invalid_dtype: Any) -> None:
-        assert not self.test_cls.check_dtype(invalid_dtype)
+    def test_check_dtype_invalid(self, invalid_dtype: object) -> None:
+        assert not self.test_cls.check_dtype(invalid_dtype)  # type: ignore[arg-type]
 
     def test_from_dtype_roundtrip(self, valid_dtype: Any) -> None:
         zdtype = self.test_cls.from_dtype(valid_dtype)
         assert zdtype.to_dtype() == valid_dtype
 
+    def test_from_json_roundtrip_v2(self, valid_json_v2: Any) -> None:
+        zdtype = self.test_cls.from_json(valid_json_v2, zarr_format=2)
+        assert zdtype.to_json(zarr_format=2) == valid_json_v2
+
+    def test_from_json_roundtrip_v3(self, valid_json_v3: Any) -> None:
+        zdtype = self.test_cls.from_json(valid_json_v3, zarr_format=3)
+        assert zdtype.to_json(zarr_format=3) == valid_json_v3
+
     """ @abc.abstractmethod
     def test_cast_value(self, value: Any) -> None:
         raise NotImplementedError

From 1458aadadb8162e7326809fa8ff186024dcec91a Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Thu, 8 May 2025 17:14:43 +0200
Subject: [PATCH 077/130] fill out more tests, and adjust sized dtypes

---
 src/zarr/core/dtype/npy/sized.py          | 49 +++++++++----------
 src/zarr/core/dtype/npy/time.py           | 38 +++++++++------
 src/zarr/core/dtype/wrapper.py            | 13 +++++
 tests/test_dtype/conftest.py              |  2 +-
 tests/test_dtype/test_npy/test_bool.py    |  7 ++-
 tests/test_dtype/test_npy/test_complex.py | 22 +++++++--
 tests/test_dtype/test_npy/test_float.py   | 30 ++++++++++--
 tests/test_dtype/test_npy/test_int.py     | 56 +++++++++++++++-------
 tests/test_dtype/test_npy/test_sized.py   | 56 +++++++++++++++++-----
 tests/test_dtype/test_npy/test_string.py  |  4 +-
 tests/test_dtype/test_npy/test_time.py    | 54 ++++++++++++++++++---
 tests/test_dtype/test_wrapper.py          | 58 ++++++++++++-----------
 12 files changed, 275 insertions(+), 114 deletions(-)

diff --git a/src/zarr/core/dtype/npy/sized.py b/src/zarr/core/dtype/npy/sized.py
index d9524a4891..032a1ec5c0 100644
--- a/src/zarr/core/dtype/npy/sized.py
+++ b/src/zarr/core/dtype/npy/sized.py
@@ -23,11 +23,10 @@
 class FixedLengthAscii(ZDType[np.dtypes.BytesDType[int], np.bytes_], HasLength):
     dtype_cls = np.dtypes.BytesDType
     _zarr_v3_name = "numpy.fixed_length_ascii"
-    item_size_bits: ClassVar[int] = 8
 
     @classmethod
     def _from_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
-        return cls(length=dtype.itemsize // (cls.item_size_bits // 8))
+        return cls(length=dtype.itemsize)
 
     def to_dtype(self) -> np.dtypes.BytesDType[int]:
         return self.dtype_cls(self.length)
@@ -43,12 +42,10 @@ def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[JSON]:
         elif zarr_format == 3:
             return (
                 isinstance(data, dict)
-                and "name" in data
+                and set(data.keys()) == {"name", "configuration"}
                 and data["name"] == cls._zarr_v3_name
-                and "configuration" in data
                 and isinstance(data["configuration"], dict)
-                and "length_bits" in data["configuration"]
-                and isinstance(data["configuration"]["length_bits"], int)
+                and "length_bytes" in data["configuration"]
             )
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
@@ -58,7 +55,7 @@ def to_json(self, zarr_format: ZarrFormat) -> JSON:
         elif zarr_format == 3:
             return {
                 "name": self._zarr_v3_name,
-                "configuration": {"length_bits": self.length * self.item_size_bits},
+                "configuration": {"length_bytes": self.length},
             }
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
@@ -67,7 +64,7 @@ def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
         if zarr_format == 2:
             return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
         elif zarr_format == 3:
-            return cls(length=data["configuration"]["length_bits"] // cls.item_size_bits)  # type: ignore[arg-type, index, call-overload, operator]
+            return cls(length=data["configuration"]["length_bytes"])  # type: ignore[arg-type, index, call-overload]
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     def default_value(self) -> np.bytes_:
@@ -94,12 +91,11 @@ class FixedLengthBytes(ZDType[np.dtypes.VoidDType[int], np.void], HasLength):
     # it cannot be used to create instances of the dtype
     # so we have to tell mypy to ignore this here
     dtype_cls = np.dtypes.VoidDType  # type: ignore[assignment]
-    _zarr_v3_name = "numpy.void"
-    item_size_bits: ClassVar[int] = 8
+    _zarr_v3_name = "numpy.fixed_length_bytes"
 
     @classmethod
     def _from_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
-        return cls(length=dtype.itemsize // (cls.item_size_bits // 8))
+        return cls(length=dtype.itemsize)
 
     def to_dtype(self) -> np.dtypes.VoidDType[int]:
         # Numpy does not allow creating a void type
@@ -114,9 +110,10 @@ def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[JSON]:
         elif zarr_format == 3:
             return (
                 isinstance(data, dict)
-                and "name" in data
-                and isinstance(data["name"], str)
-                and (re.match(r"^r\d+$", data["name"]) is not None)
+                and set(data.keys()) == {"name", "configuration"}
+                and data["name"] == cls._zarr_v3_name
+                and isinstance(data["configuration"], dict)
+                and set(data["configuration"].keys()) == {"length_bytes"}
             )
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
@@ -124,7 +121,7 @@ def to_json(self, zarr_format: ZarrFormat) -> JSON:
         if zarr_format == 2:
             return self.to_dtype().str
         elif zarr_format == 3:
-            return {"name": f"r{self.length * self.item_size_bits}"}
+            return {"name": self._zarr_v3_name, "configuration": {"length_bytes": self.length}}
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     @classmethod
@@ -132,7 +129,7 @@ def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
         if zarr_format == 2:
             return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
         elif zarr_format == 3:
-            return cls(length=int(data["name"][1:]) // cls.item_size_bits)  # type: ignore[arg-type, index, call-overload]
+            return cls(length=data["configuration"]["length_bytes"])  # type: ignore[arg-type, index, call-overload]
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     @classmethod
@@ -178,13 +175,13 @@ def _cast_value_unsafe(self, value: object) -> np.void:
 class FixedLengthUnicode(ZDType[np.dtypes.StrDType[int], np.str_], HasEndianness, HasLength):
     dtype_cls = np.dtypes.StrDType
     _zarr_v3_name = "numpy.fixed_length_ucs4"
-    item_size_bits: ClassVar[int] = 32  # UCS4 is 32 bits per code point
+    item_size_bytes: ClassVar[int] = 4  # UCS4 is 4 bytes per code point
 
     @classmethod
     def _from_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
         byte_order = cast("EndiannessNumpy", dtype.byteorder)
         return cls(
-            length=dtype.itemsize // (cls.item_size_bits // 8),
+            length=dtype.itemsize // (cls.item_size_bytes),
             endianness=endianness_from_numpy_str(byte_order),
         )
 
@@ -203,12 +200,12 @@ def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[JSON]:
         elif zarr_format == 3:
             return (
                 isinstance(data, dict)
-                and "name" in data
+                and set(data.keys()) == {"name", "configuration"}
                 and data["name"] == cls._zarr_v3_name
                 and "configuration" in data
                 and isinstance(data["configuration"], dict)
-                and "length_bits" in data["configuration"]
-                and isinstance(data["configuration"]["length_bits"], int)
+                and set(data["configuration"].keys()) == {"length_bytes"}
+                and isinstance(data["configuration"]["length_bytes"], int)
             )
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
@@ -218,7 +215,7 @@ def to_json(self, zarr_format: ZarrFormat) -> JSON:
         elif zarr_format == 3:
             return {
                 "name": self._zarr_v3_name,
-                "configuration": {"length_bits": self.length * self.item_size_bits},
+                "configuration": {"length_bytes": self.length * self.item_size_bytes},
             }
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
@@ -227,7 +224,7 @@ def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
         if zarr_format == 2:
             return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
         elif zarr_format == 3:
-            return cls(length=data["configuration"]["length_bits"] // cls.item_size_bits)  # type: ignore[arg-type, index, call-overload, operator]
+            return cls(length=data["configuration"]["length_bytes"] // cls.item_size_bytes)  # type: ignore[arg-type, index, call-overload, operator]
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     def default_value(self) -> np.str_:
@@ -344,7 +341,7 @@ def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
                         for f_name, f_dtype in data
                     )
                 )
-            elif zarr_format == 3:  # noqa: SIM102
+            elif zarr_format == 3:
                 if isinstance(data, dict) and "configuration" in data:
                     config = data["configuration"]
                     if isinstance(config, dict) and "fields" in config:
@@ -354,6 +351,10 @@ def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
                             for f_name, f_dtype in meta_fields
                         )
                         return cls(fields=fields)
+                    else:
+                        raise TypeError(f"Invalid type: {data}. Expected a dictionary.")
+                else:
+                    raise TypeError(f"Invalid type: {data}. Expected a dictionary.")
             raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
         raise DataTypeValidationError(f"Invalid JSON representation of data type {cls}.")
 
diff --git a/src/zarr/core/dtype/npy/time.py b/src/zarr/core/dtype/npy/time.py
index f691bd88c8..b8fc85b297 100644
--- a/src/zarr/core/dtype/npy/time.py
+++ b/src/zarr/core/dtype/npy/time.py
@@ -105,20 +105,31 @@ class TimeDTypeBase(ZDType[_BaseTimeDType_co, _BaseTimeScalar], HasEndianness):
     # because the particular numpy dtype we are wrapping does not allow direct construction via
     # cls.dtype_cls()
     _numpy_name: ClassVar[_DTypeName]
-    interval: int
+    scale_factor: int
     unit: DateTimeUnit
 
+    def __post_init__(self) -> None:
+        if self.scale_factor < 1:
+            raise ValueError(f"scale_factor must be > 0, got {self.scale_factor}.")
+        if self.scale_factor >= 2**31:
+            raise ValueError(f"scale_factor must be < 2147483648, got {self.scale_factor}.")
+        if self.unit not in get_args(DateTimeUnit):
+            raise ValueError(f"unit must be one of {get_args(DateTimeUnit)}, got {self.unit!r}.")
+
     @classmethod
     def _from_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
-        unit, interval = np.datetime_data(dtype.name)
+        unit, scale_factor = np.datetime_data(dtype.name)
+        unit = cast("DateTimeUnit", unit)
         byteorder = cast("EndiannessNumpy", dtype.byteorder)
-        return cls(unit=unit, interval=interval, endianness=endianness_from_numpy_str(byteorder))  # type: ignore[arg-type]
+        return cls(
+            unit=unit, scale_factor=scale_factor, endianness=endianness_from_numpy_str(byteorder)
+        )
 
     def to_dtype(self) -> _BaseTimeDType_co:
         # Numpy does not allow creating datetime64 or timedelta64 via
         # np.dtypes.{dtype_name}()
         # so we use np.dtype with a formatted string.
-        dtype_string = f"{self._numpy_name}[{self.interval}{self.unit}]"
+        dtype_string = f"{self._numpy_name}[{self.scale_factor}{self.unit}]"
         return np.dtype(dtype_string).newbyteorder(endianness_to_numpy_str(self.endianness))  # type: ignore[return-value]
 
     @classmethod
@@ -127,8 +138,8 @@ def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
             return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
         elif zarr_format == 3:
             unit = data["configuration"]["unit"]  # type: ignore[index, call-overload]
-            interval = data["configuration"]["interval"]  # type: ignore[index, call-overload]
-            return cls(unit=unit, interval=interval)  # type: ignore[arg-type]
+            scale_factor = data["configuration"]["scale_factor"]  # type: ignore[index, call-overload]
+            return cls(unit=unit, scale_factor=scale_factor)  # type: ignore[arg-type]
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     def to_json(self, zarr_format: ZarrFormat) -> JSON:
@@ -137,7 +148,7 @@ def to_json(self, zarr_format: ZarrFormat) -> JSON:
         elif zarr_format == 3:
             return {
                 "name": self._zarr_v3_name,
-                "configuration": {"unit": self.unit, "interval": self.interval},
+                "configuration": {"unit": self.unit, "scale_factor": self.scale_factor},
             }
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
@@ -166,7 +177,7 @@ class TimeDelta64(TimeDTypeBase[np.dtypes.TimeDelta64DType, np.timedelta64], Has
     _zarr_v3_name = "numpy.timedelta64"
     _zarr_v2_names = (">m8", "<m8")
     _numpy_name = "timedelta64"
-    interval: int = 1
+    scale_factor: int = 1
     unit: DateTimeUnit = "generic"
 
     def default_value(self) -> np.timedelta64:
@@ -174,7 +185,7 @@ def default_value(self) -> np.timedelta64:
 
     def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.timedelta64:
         if check_json_int(data):
-            return self.to_dtype().type(data, f"{self.interval}{self.unit}")
+            return self.to_dtype().type(data, f"{self.scale_factor}{self.unit}")
         raise TypeError(f"Invalid type: {data}. Expected an integer.")
 
     def _cast_value_unsafe(self, value: object) -> np.timedelta64:
@@ -202,8 +213,7 @@ def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[JSON]:
                 and data["name"] == cls._zarr_v3_name
                 and set(data.keys()) == {"name", "configuration"}
                 and isinstance(data["configuration"], dict)
-                and set(data["configuration"].keys()) == {"unit", "interval"}
-                and data["configuration"]["unit"] in get_args(DateTimeUnit)
+                and set(data["configuration"].keys()) == {"unit", "scale_factor"}
             )
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
@@ -215,14 +225,14 @@ class DateTime64(TimeDTypeBase[np.dtypes.DateTime64DType, np.datetime64], HasEnd
     _zarr_v2_names = (">M8", "<M8")
     _numpy_name = "datetime64"
     unit: DateTimeUnit = "generic"
-    interval: int = 1
+    scale_factor: int = 1
 
     def default_value(self) -> np.datetime64:
         return np.datetime64("NaT")
 
     def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.datetime64:
         if check_json_int(data):
-            return self.to_dtype().type(data, f"{self.interval}{self.unit}")
+            return self.to_dtype().type(data, f"{self.scale_factor}{self.unit}")
         raise TypeError(f"Invalid type: {data}. Expected an integer.")
 
     def _cast_value_unsafe(self, value: object) -> np.datetime64:
@@ -248,7 +258,7 @@ def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[JSON]:
                 isinstance(data, dict)
                 and set(data.keys()) == {"name", "configuration"}
                 and data["name"] == cls._zarr_v3_name
-                and set(data["configuration"].keys()) == {"unit", "interval"}
+                and set(data["configuration"].keys()) == {"unit", "scale_factor"}
                 and data["configuration"]["unit"] in get_args(DateTimeUnit)
             )
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
diff --git a/src/zarr/core/dtype/wrapper.py b/src/zarr/core/dtype/wrapper.py
index be51db3ae5..0600fab80b 100644
--- a/src/zarr/core/dtype/wrapper.py
+++ b/src/zarr/core/dtype/wrapper.py
@@ -22,6 +22,7 @@
 
 from __future__ import annotations
 
+import warnings
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
 from typing import TYPE_CHECKING, ClassVar, Generic, Self, TypeGuard, TypeVar
@@ -329,3 +330,15 @@ def from_json_value(self: Self, data: JSON, *, zarr_format: ZarrFormat) -> TScal
             The native scalar value.
         """
         ...
+
+
+def v3_unstable_dtype_warning(dtype: ZDType[TBaseDType, TBaseScalar]) -> None:
+    msg = (
+        f"You are using a data type ({dtype}) that does not have a stable Zarr V3 specification."
+        "Be advised that arrays stored with this data type may be unreadable by other Zarr "
+        "libraries, and possibly future versions of Zarr-Python as well. "
+        "Use this data type at your own risk."
+        "See https://github.com/zarr-developers/zarr-extensions/tree/main/data-types for a list"
+        "of data types with a stable Zarr V3 specification."
+    )
+    warnings.warn(msg, category=FutureWarning, stacklevel=2)
diff --git a/tests/test_dtype/conftest.py b/tests/test_dtype/conftest.py
index 2b4bb0b685..d8ef17a039 100644
--- a/tests/test_dtype/conftest.py
+++ b/tests/test_dtype/conftest.py
@@ -17,7 +17,7 @@
     elif issubclass(wrapper_cls, HasLength):
         zdtype_examples += (wrapper_cls(length=1),)
     elif issubclass(wrapper_cls, DateTime64 | TimeDelta64):
-        zdtype_examples += (wrapper_cls(unit="s", interval=10),)
+        zdtype_examples += (wrapper_cls(unit="s", scale_factor=10),)
     else:
         zdtype_examples += (wrapper_cls(),)
 
diff --git a/tests/test_dtype/test_npy/test_bool.py b/tests/test_dtype/test_npy/test_bool.py
index e4e5dd541e..1040683846 100644
--- a/tests/test_dtype/test_npy/test_bool.py
+++ b/tests/test_dtype/test_npy/test_bool.py
@@ -14,8 +14,8 @@ class TestBool(_TestZDType):
         np.dtype(np.float64),
         np.dtype(np.uint16),
     )
-    valid_json_v2 = Bool._zarr_v2_names
-    valid_json_v3_cases = (Bool._zarr_v3_name,)
+    valid_json_v2 = ("|b1",)
+    valid_json_v3 = ("bool",)
     invalid_json_v2 = (
         "|b1",
         "bool",
@@ -26,3 +26,6 @@ class TestBool(_TestZDType):
         "|f8",
         {"name": "bool", "configuration": {"endianness": "little"}},
     )
+
+    scalar_v2_params = (("|b1", True), ("|b1", False))
+    scalar_v3_params = (("bool", True), ("bool", False))
diff --git a/tests/test_dtype/test_npy/test_complex.py b/tests/test_dtype/test_npy/test_complex.py
index 6621d625d9..aac514028d 100644
--- a/tests/test_dtype/test_npy/test_complex.py
+++ b/tests/test_dtype/test_npy/test_complex.py
@@ -14,8 +14,8 @@ class TestComplex64(_TestZDType):
         np.dtype(np.float64),
         np.dtype(np.complex128),
     )
-    valid_json_v2 = Complex64._zarr_v2_names
-    valid_json_v3_cases = (Complex64._zarr_v3_name,)
+    valid_json_v2 = (">c8", ">c8")
+    valid_json_v3 = ("complex64",)
     invalid_json_v2 = (
         "|c8",
         "complex64",
@@ -27,6 +27,13 @@ class TestComplex64(_TestZDType):
         {"name": "complex64", "configuration": {"endianness": "little"}},
     )
 
+    scalar_v2_params = ((">c8", (1.0, 1.0)), ("<c8", (-1.0, "Infinity")), (">c8", (0, "NaN")))
+    scalar_v3_params = (
+        ("complex64", (1.0, 1.0)),
+        ("complex64", (-1.0, "Infinity")),
+        ("complex64", (0, "NaN")),
+    )
+
 
 class TestComplex128(_TestZDType):
     test_cls = Complex128
@@ -36,8 +43,8 @@ class TestComplex128(_TestZDType):
         np.dtype(np.float64),
         np.dtype(np.complex64),
     )
-    valid_json_v2 = Complex128._zarr_v2_names
-    valid_json_v3_cases = (Complex128._zarr_v3_name,)
+    valid_json_v2 = (">c16", "<c16")
+    valid_json_v3 = ("complex128",)
     invalid_json_v2 = (
         "|c16",
         "complex128",
@@ -48,3 +55,10 @@ class TestComplex128(_TestZDType):
         "|f8",
         {"name": "complex128", "configuration": {"endianness": "little"}},
     )
+
+    scalar_v2_params = ((">c16", (1.0, 1.0)), ("<c16", (-1.0, "Infinity")), (">c16", (0, "NaN")))
+    scalar_v3_params = (
+        ("complex128", (1.0, 1.0)),
+        ("complex128", (-1.0, "Infinity")),
+        ("complex128", (0, "NaN")),
+    )
diff --git a/tests/test_dtype/test_npy/test_float.py b/tests/test_dtype/test_npy/test_float.py
index a9de0145c6..232ed1e32c 100644
--- a/tests/test_dtype/test_npy/test_float.py
+++ b/tests/test_dtype/test_npy/test_float.py
@@ -15,7 +15,7 @@ class TestFloat16(_TestZDType):
         np.dtype(np.float32),
     )
     valid_json_v2 = Float16._zarr_v2_names
-    valid_json_v3_cases = (Float16._zarr_v3_name,)
+    valid_json_v3 = (Float16._zarr_v3_name,)
     invalid_json_v2 = (
         "|f2",
         "float16",
@@ -27,6 +27,14 @@ class TestFloat16(_TestZDType):
         {"name": "float16", "configuration": {"endianness": "little"}},
     )
 
+    scalar_v2_params = ((">f2", 1.0), ("<f2", -1.0), ("<f2", "NaN"), (">f2", "Infinity"))
+    scalar_v3_params = (
+        ("float16", 1.0),
+        ("float16", -1.0),
+        ("float16", "NaN"),
+        ("float16", "Infinity"),
+    )
+
 
 class TestFloat32(_TestZDType):
     test_cls = Float32
@@ -37,7 +45,7 @@ class TestFloat32(_TestZDType):
         np.dtype(np.float64),
     )
     valid_json_v2 = Float32._zarr_v2_names
-    valid_json_v3_cases = (Float32._zarr_v3_name,)
+    valid_json_v3 = (Float32._zarr_v3_name,)
     invalid_json_v2 = (
         "|f4",
         "float32",
@@ -49,6 +57,14 @@ class TestFloat32(_TestZDType):
         {"name": "float32", "configuration": {"endianness": "little"}},
     )
 
+    scalar_v2_params = ((">f4", 1.0), ("<f4", -1.0), ("<f4", "NaN"), (">f4", "Infinity"))
+    scalar_v3_params = (
+        ("float32", 1.0),
+        ("float32", -1.0),
+        ("float32", "NaN"),
+        ("float32", "Infinity"),
+    )
+
 
 class TestFloat64(_TestZDType):
     test_cls = Float64
@@ -59,7 +75,7 @@ class TestFloat64(_TestZDType):
         np.dtype(np.float32),
     )
     valid_json_v2 = Float64._zarr_v2_names
-    valid_json_v3_cases = (Float64._zarr_v3_name,)
+    valid_json_v3 = (Float64._zarr_v3_name,)
     invalid_json_v2 = (
         "|f8",
         "float64",
@@ -70,3 +86,11 @@ class TestFloat64(_TestZDType):
         "|i1",
         {"name": "float64", "configuration": {"endianness": "little"}},
     )
+
+    scalar_v2_params = ((">f8", 1.0), ("<f8", -1.0), ("<f8", "NaN"), (">f8", "Infinity"))
+    scalar_v3_params = (
+        ("float64", 1.0),
+        ("float64", -1.0),
+        ("float64", "NaN"),
+        ("float64", "Infinity"),
+    )
diff --git a/tests/test_dtype/test_npy/test_int.py b/tests/test_dtype/test_npy/test_int.py
index 2f149ff58f..99f698fc8e 100644
--- a/tests/test_dtype/test_npy/test_int.py
+++ b/tests/test_dtype/test_npy/test_int.py
@@ -14,8 +14,8 @@ class TestInt8(_TestZDType):
         np.dtype(np.uint16),
         np.dtype(np.float64),
     )
-    valid_json_v2 = Int8._zarr_v2_names
-    valid_json_v3_cases = (Int8._zarr_v3_name,)
+    valid_json_v2 = ("|i1",)
+    valid_json_v3 = ("int8",)
     invalid_json_v2 = (
         ">i1",
         "int8",
@@ -27,6 +27,9 @@ class TestInt8(_TestZDType):
         {"name": "int8", "configuration": {"endianness": "little"}},
     )
 
+    scalar_v2_params = (("|i1", 1), ("|i1", -1))
+    scalar_v3_params = (("int8", 1), ("int8", -1))
+
 
 class TestInt16(_TestZDType):
     test_cls = Int16
@@ -36,8 +39,8 @@ class TestInt16(_TestZDType):
         np.dtype(np.uint16),
         np.dtype(np.float64),
     )
-    valid_json_v2 = Int16._zarr_v2_names
-    valid_json_v3_cases = (Int16._zarr_v3_name,)
+    valid_json_v2 = (">i2", "<i2")
+    valid_json_v3 = ("int16",)
     invalid_json_v2 = (
         "|i2",
         "int16",
@@ -49,6 +52,9 @@ class TestInt16(_TestZDType):
         {"name": "int16", "configuration": {"endianness": "little"}},
     )
 
+    scalar_v2_params = (("<i2", 1), (">i2", -1))
+    scalar_v3_params = (("int16", 1), ("int16", -1))
+
 
 class TestInt32(_TestZDType):
     test_cls = Int32
@@ -58,8 +64,8 @@ class TestInt32(_TestZDType):
         np.dtype(np.uint16),
         np.dtype(np.float64),
     )
-    valid_json_v2 = Int32._zarr_v2_names
-    valid_json_v3_cases = (Int32._zarr_v3_name,)
+    valid_json_v2 = (">i4", "<i4")
+    valid_json_v3 = ("int32",)
     invalid_json_v2 = (
         "|i4",
         "int32",
@@ -71,6 +77,9 @@ class TestInt32(_TestZDType):
         {"name": "int32", "configuration": {"endianness": "little"}},
     )
 
+    scalar_v2_params = (("<i4", 1), (">i4", -1))
+    scalar_v3_params = (("int32", 1), ("int32", -1))
+
 
 class TestInt64(_TestZDType):
     test_cls = Int64
@@ -80,8 +89,8 @@ class TestInt64(_TestZDType):
         np.dtype(np.uint16),
         np.dtype(np.float64),
     )
-    valid_json_v2 = Int64._zarr_v2_names
-    valid_json_v3_cases = (Int64._zarr_v3_name,)
+    valid_json_v2 = (">i8", "<i8")
+    valid_json_v3 = ("int64",)
     invalid_json_v2 = (
         "|i8",
         "int64",
@@ -93,6 +102,9 @@ class TestInt64(_TestZDType):
         {"name": "int64", "configuration": {"endianness": "little"}},
     )
 
+    scalar_v2_params = (("<i8", 1), (">i8", -1))
+    scalar_v3_params = (("int64", 1), ("int64", -1))
+
 
 class TestUInt8(_TestZDType):
     test_cls = UInt8
@@ -102,8 +114,8 @@ class TestUInt8(_TestZDType):
         np.dtype(np.int16),
         np.dtype(np.float64),
     )
-    valid_json_v2 = UInt8._zarr_v2_names
-    valid_json_v3_cases = (UInt8._zarr_v3_name,)
+    valid_json_v2 = ("|u1",)
+    valid_json_v3 = ("uint8",)
     invalid_json_v2 = (
         "|u1",
         "uint8",
@@ -115,6 +127,9 @@ class TestUInt8(_TestZDType):
         {"name": "uint8", "configuration": {"endianness": "little"}},
     )
 
+    scalar_v2_params = (("|u1", 1), ("|u1", 0))
+    scalar_v3_params = (("uint8", 1), ("uint8", 0))
+
 
 class TestUInt16(_TestZDType):
     test_cls = UInt16
@@ -124,8 +139,8 @@ class TestUInt16(_TestZDType):
         np.dtype(np.int16),
         np.dtype(np.float64),
     )
-    valid_json_v2 = UInt16._zarr_v2_names
-    valid_json_v3_cases = (UInt16._zarr_v3_name,)
+    valid_json_v2 = (">u2", "<u2")
+    valid_json_v3 = ("uint16",)
     invalid_json_v2 = (
         "|u2",
         "uint16",
@@ -137,6 +152,9 @@ class TestUInt16(_TestZDType):
         {"name": "uint16", "configuration": {"endianness": "little"}},
     )
 
+    scalar_v2_params = (("<u2", 1), (">u2", 0))
+    scalar_v3_params = (("uint16", 1), ("uint16", 0))
+
 
 class TestUInt32(_TestZDType):
     test_cls = UInt32
@@ -146,8 +164,8 @@ class TestUInt32(_TestZDType):
         np.dtype(np.int16),
         np.dtype(np.float64),
     )
-    valid_json_v2 = UInt32._zarr_v2_names
-    valid_json_v3_cases = (UInt32._zarr_v3_name,)
+    valid_json_v2 = (">u4", "<u4")
+    valid_json_v3 = ("uint32",)
     invalid_json_v2 = (
         "|u4",
         "uint32",
@@ -159,6 +177,9 @@ class TestUInt32(_TestZDType):
         {"name": "uint32", "configuration": {"endianness": "little"}},
     )
 
+    scalar_v2_params = (("<u4", 1), (">u4", 0))
+    scalar_v3_params = (("uint32", 1), ("uint32", 0))
+
 
 class TestUInt64(_TestZDType):
     test_cls = UInt64
@@ -168,8 +189,8 @@ class TestUInt64(_TestZDType):
         np.dtype(np.int16),
         np.dtype(np.float64),
     )
-    valid_json_v2 = UInt64._zarr_v2_names
-    valid_json_v3_cases = (UInt64._zarr_v3_name,)
+    valid_json_v2 = (">u8", "<u8")
+    valid_json_v3 = ("uint64",)
     invalid_json_v2 = (
         "|u8",
         "uint64",
@@ -180,3 +201,6 @@ class TestUInt64(_TestZDType):
         "|f8",
         {"name": "uint64", "configuration": {"endianness": "little"}},
     )
+
+    scalar_v2_params = (("<u8", 1), (">u8", 0))
+    scalar_v3_params = (("uint64", 1), ("uint64", 0))
diff --git a/tests/test_dtype/test_npy/test_sized.py b/tests/test_dtype/test_npy/test_sized.py
index 887d734fd3..17f4b2af2d 100644
--- a/tests/test_dtype/test_npy/test_sized.py
+++ b/tests/test_dtype/test_npy/test_sized.py
@@ -20,9 +20,7 @@ class TestFixedLengthAscii(_TestZDType):
         np.dtype("|U10"),
     )
     valid_json_v2 = ("|S0", "|S2", "|S4")
-    valid_json_v3_cases = (
-        {"name": "numpy.fixed_length_ascii", "configuration": {"length_bits": 80}},
-    )
+    valid_json_v3 = ({"name": "numpy.fixed_length_ascii", "configuration": {"length_bytes": 10}},)
     invalid_json_v2 = (
         "|S",
         "|U10",
@@ -33,6 +31,13 @@ class TestFixedLengthAscii(_TestZDType):
         {"name": "numpy.fixed_length_ascii", "configuration": {"length_bits": "invalid"}},
     )
 
+    scalar_v2_params = (("|S0", ""), ("|S2", "YWI="), ("|S4", "YWJjZA=="))
+    scalar_v3_params = (
+        ({"name": "numpy.fixed_length_ascii", "configuration": {"length_bytes": 0}}, ""),
+        ({"name": "numpy.fixed_length_ascii", "configuration": {"length_bytes": 16}}, "YWI="),
+        ({"name": "numpy.fixed_length_ascii", "configuration": {"length_bytes": 32}}, "YWJjZA=="),
+    )
+
 
 class TestFixedLengthBytes(_TestZDType):
     test_cls = FixedLengthBytes
@@ -43,17 +48,28 @@ class TestFixedLengthBytes(_TestZDType):
         np.dtype("|S10"),
     )
     valid_json_v2 = ("|V10",)
-    valid_json_v3_cases = ({"name": "r80"},)
+    valid_json_v3 = (
+        {"name": "numpy.fixed_length_bytes", "configuration": {"length_bytes": 0}},
+        {"name": "numpy.fixed_length_bytes", "configuration": {"length_bytes": 8}},
+    )
+
     invalid_json_v2 = (
         "|V",
         "|S10",
         "|f8",
     )
     invalid_json_v3 = (
-        {"name": "r0"},
+        {"name": "r10"},
         {"name": "r-80"},
     )
 
+    scalar_v2_params = (("|V0", ""), ("|V2", "YWI="), ("|V4", "YWJjZA=="))
+    scalar_v3_params = (
+        ({"name": "numpy.fixed_length_bytes", "configuration": {"length_bytes": 2}}, ""),
+        ({"name": "numpy.fixed_length_bytes", "configuration": {"length_bytes": 2}}, "YWI="),
+        ({"name": "numpy.fixed_length_bytes", "configuration": {"length_bytes": 4}}, "YWJjZA=="),
+    )
+
 
 class TestFixedLengthUnicode(_TestZDType):
     test_cls = FixedLengthUnicode
@@ -64,9 +80,7 @@ class TestFixedLengthUnicode(_TestZDType):
         np.dtype("|S10"),
     )
     valid_json_v2 = (">U10", "<U10")
-    valid_json_v3_cases = (
-        {"name": "numpy.fixed_length_ucs4", "configuration": {"length_bits": 320}},
-    )
+    valid_json_v3 = ({"name": "numpy.fixed_length_ucs4", "configuration": {"length_bytes": 320}},)
     invalid_json_v2 = (
         "|U",
         "|S10",
@@ -77,6 +91,13 @@ class TestFixedLengthUnicode(_TestZDType):
         {"name": "numpy.fixed_length_ucs4", "configuration": {"length_bits": "invalid"}},
     )
 
+    scalar_v2_params = ((">U0", ""), ("<U2", "hi"))
+    scalar_v3_params = (
+        ({"name": "numpy.fixed_length_ucs4", "configuration": {"length_bytes": 0}}, ""),
+        ({"name": "numpy.fixed_length_ucs4", "configuration": {"length_bytes": 8}}, "hi"),
+        ({"name": "numpy.fixed_length_ucs4", "configuration": {"length_bytes": 16}}, "hihi"),
+    )
+
 
 class TestStructured(_TestZDType):
     test_cls = Structured
@@ -93,13 +114,13 @@ class TestStructured(_TestZDType):
         [("field1", ">i4"), ("field2", ">f8")],
         [("field1", ">i8"), ("field2", ">i4")],
     )
-    valid_json_v3_cases = (
+    valid_json_v3 = (
         {
             "name": "structured",
             "configuration": {
                 "fields": [
-                    ("field1", {"name": "int32", "configuration": {"endianness": "big"}}),
-                    ("field2", {"name": "float64", "configuration": {"endianness": "big"}}),
+                    ("field1", "int32"),
+                    ("field2", "float64"),
                 ]
             },
         },
@@ -107,8 +128,17 @@ class TestStructured(_TestZDType):
             "name": "structured",
             "configuration": {
                 "fields": [
-                    ("field1", {"name": "int64", "configuration": {"endianness": "big"}}),
-                    ("field2", {"name": "int32", "configuration": {"endianness": "big"}}),
+                    (
+                        "field1",
+                        {
+                            "name": "numpy.datetime64",
+                            "configuration": {"unit": "s", "scale_factor": 1},
+                        },
+                    ),
+                    (
+                        "field2",
+                        {"name": "numpy.fixed_length_ucs4", "configuration": {"length_bytes": 32}},
+                    ),
                 ]
             },
         },
diff --git a/tests/test_dtype/test_npy/test_string.py b/tests/test_dtype/test_npy/test_string.py
index 2f77379f01..fbb0aaa86d 100644
--- a/tests/test_dtype/test_npy/test_string.py
+++ b/tests/test_dtype/test_npy/test_string.py
@@ -16,7 +16,7 @@ class TestVariableLengthString(_TestZDType):
             np.dtype("|S10"),
         )
         valid_json_v2 = ("|O",)
-        valid_json_v3_cases = ({"name": "numpy.variable_length_utf8"},)
+        valid_json_v3 = ("numpy.variable_length_utf8",)
         invalid_json_v2 = (
             "|S10",
             "|f8",
@@ -38,7 +38,7 @@ class TestVariableLengthString(_TestZDType):
             np.dtype("|S10"),
         )
         valid_json_v2 = ("|O",)
-        valid_json_v3_cases = ({"name": "numpy.variable_length_utf8"},)
+        valid_json_v3 = ("numpy.variable_length_utf8",)
         invalid_json_v2 = (
             "|S10",
             "|f8",
diff --git a/tests/test_dtype/test_npy/test_time.py b/tests/test_dtype/test_npy/test_time.py
index a5d2cce545..2a8ff6ac98 100644
--- a/tests/test_dtype/test_npy/test_time.py
+++ b/tests/test_dtype/test_npy/test_time.py
@@ -1,6 +1,9 @@
 from __future__ import annotations
 
+import re
+
 import numpy as np
+import pytest
 
 from tests.test_dtype.test_wrapper import _TestZDType
 from zarr.core.dtype.npy.time import DateTime64, TimeDelta64
@@ -15,9 +18,9 @@ class TestDateTime64(_TestZDType):
         np.dtype("timedelta64[ns]"),
     )
     valid_json_v2 = (">M8", ">M8[s]", "<M8[10s]", "<M8[10us]")
-    valid_json_v3_cases = (
-        {"name": "datetime64", "configuration": {"unit": "ns", "scale_factor": 10}},
-        {"name": "datetime64", "configuration": {"unit": "us", "scale_factor": 1}},
+    valid_json_v3 = (
+        {"name": "numpy.datetime64", "configuration": {"unit": "ns", "scale_factor": 10}},
+        {"name": "numpy.datetime64", "configuration": {"unit": "us", "scale_factor": 1}},
     )
     invalid_json_v2 = (
         "datetime64",
@@ -38,10 +41,11 @@ class TestTimeDelta64(_TestZDType):
         np.dtype(np.float64),
         np.dtype("datetime64[ns]"),
     )
+
     valid_json_v2 = TimeDelta64._zarr_v2_names
-    valid_json_v3_cases = (
-        {"name": "timedelta64", "configuration": {"unit": "ns"}},
-        {"name": "timedelta64", "configuration": {"unit": "us"}},
+    valid_json_v3 = (
+        {"name": "numpy.timedelta64", "configuration": {"unit": "ns", "scale_factor": 10}},
+        {"name": "numpy.timedelta64", "configuration": {"unit": "us", "scale_factor": 1}},
     )
     invalid_json_v2 = (
         "timedelta64",
@@ -49,6 +53,42 @@ class TestTimeDelta64(_TestZDType):
         "datetime64[ns]",
     )
     invalid_json_v3 = (
-        {"name": "timedelta64", "configuration": {"unit": "invalid"}},
+        {"name": "timedelta64", "configuration": {"unit": 1, "scale_factor": 10}},
         {"name": "timedelta64", "configuration": {"unit": 123}},
     )
+
+
+def test_time_invalid_unit() -> None:
+    """
+    Test that an invalid unit raises a ValueError.
+    """
+    unit = "invalid"
+    msg = f"unit must be one of ('Y', 'M', 'W', 'D', 'h', 'm', 's', 'ms', 'us', 'μs', 'ns', 'ps', 'fs', 'as', 'generic'), got {unit!r}."
+    with pytest.raises(ValueError, match=re.escape(msg)):
+        DateTime64(unit=unit)  # type: ignore[arg-type]
+    with pytest.raises(ValueError, match=re.escape(msg)):
+        TimeDelta64(unit=unit)  # type: ignore[arg-type]
+
+
+def test_time_scale_factor_too_low() -> None:
+    """
+    Test that an invalid unit raises a ValueError.
+    """
+    scale_factor = 0
+    msg = f"scale_factor must be > 0, got {scale_factor}."
+    with pytest.raises(ValueError, match=msg):
+        DateTime64(scale_factor=scale_factor)
+    with pytest.raises(ValueError, match=msg):
+        TimeDelta64(scale_factor=scale_factor)
+
+
+def test_time_scale_factor_too_high() -> None:
+    """
+    Test that an invalid unit raises a ValueError.
+    """
+    scale_factor = 2**31
+    msg = f"scale_factor must be < 2147483648, got {scale_factor}."
+    with pytest.raises(ValueError, match=msg):
+        DateTime64(scale_factor=scale_factor)
+    with pytest.raises(ValueError, match=msg):
+        TimeDelta64(scale_factor=scale_factor)
diff --git a/tests/test_dtype/test_wrapper.py b/tests/test_dtype/test_wrapper.py
index bbe74d9a0f..49e05340e0 100644
--- a/tests/test_dtype/test_wrapper.py
+++ b/tests/test_dtype/test_wrapper.py
@@ -1,34 +1,9 @@
 from __future__ import annotations
 
-from typing import Any, ClassVar
+from typing import TYPE_CHECKING, Any, ClassVar
 
-import hypothesis.strategies as st
-import numpy as np
-from hypothesis.extra import numpy as npst
-
-from zarr.core.dtype.wrapper import TBaseDType, TBaseScalar, ZDType
-
-
-def all_dtypes() -> st.SearchStrategy[np.dtype[np.generic]]:
-    return (
-        npst.boolean_dtypes()
-        | npst.integer_dtypes(endianness="=")
-        | npst.unsigned_integer_dtypes(endianness="=")
-        | npst.floating_dtypes(endianness="=")
-        | npst.complex_number_dtypes(endianness="=")
-        | npst.byte_string_dtypes(endianness="=")
-        | npst.unicode_string_dtypes(endianness="=")
-        | npst.datetime64_dtypes(endianness="=")
-        | npst.timedelta64_dtypes(endianness="=")
-    )
-
-
-def get_classvar_attributes(cls: type) -> dict[str, Any]:
-    classvar_attributes = {}
-    for name, annotation in cls.__annotations__.items():
-        if getattr(annotation, "__origin__", None) is ClassVar:
-            classvar_attributes[name] = getattr(cls, name)
-    return classvar_attributes
+if TYPE_CHECKING:
+    from zarr.core.dtype.wrapper import TBaseDType, TBaseScalar, ZDType
 
 
 class _TestZDType:
@@ -43,6 +18,13 @@ class _TestZDType:
     valid_json_v3: ClassVar[tuple[str | dict[str, object], ...]] = ()
     invalid_json_v3: ClassVar[tuple[str | dict[str, object], ...]] = ()
 
+    # for testing scalar round-trip serialization, we need a tuple of (data type json, scalar json)
+    # pairs. the first element of the pair is used to create a dtype instance, and the second
+    # element is the json serialization of the scalar that we want to round-trip.
+
+    scalar_v2_params: ClassVar[tuple[tuple[Any, Any], ...]] = ()
+    scalar_v3_params: ClassVar[tuple[tuple[Any, Any], ...]] = ()
+
     def test_check_dtype_valid(self, valid_dtype: object) -> None:
         assert self.test_cls.check_dtype(valid_dtype)  # type: ignore[arg-type]
 
@@ -61,6 +43,26 @@ def test_from_json_roundtrip_v3(self, valid_json_v3: Any) -> None:
         zdtype = self.test_cls.from_json(valid_json_v3, zarr_format=3)
         assert zdtype.to_json(zarr_format=3) == valid_json_v3
 
+    def test_scalar_roundtrip_v2(self, scalar_v2_params: Any) -> None:
+        dtype_json, scalar_json = scalar_v2_params
+        zdtype = self.test_cls.from_json(dtype_json, zarr_format=2)
+        scalar = zdtype.from_json_value(scalar_json, zarr_format=2)
+        assert self._scalar_equals(scalar_json, zdtype.to_json_value(scalar, zarr_format=2))
+
+    def test_scalar_roundtrip_v3(self, scalar_v3_params: Any) -> None:
+        dtype_json, scalar_json = scalar_v3_params
+        zdtype = self.test_cls.from_json(dtype_json, zarr_format=3)
+        scalar = zdtype.from_json_value(scalar_json, zarr_format=3)
+        assert self._scalar_equals(scalar_json, zdtype.to_json_value(scalar, zarr_format=3))
+
+    @staticmethod
+    def _scalar_equals(a: object, b: object) -> bool:
+        """
+        Compare two scalars for equality. Subclasses that test dtypes with scalars that don't allow
+        simple equality like nans should override this method.
+        """
+        return a == b
+
     """ @abc.abstractmethod
     def test_cast_value(self, value: Any) -> None:
         raise NotImplementedError

From aa11df4425d7b3ee6f5af397f774db564b78b5a8 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Mon, 12 May 2025 13:37:52 +0200
Subject: [PATCH 078/130] wip: json schema test

---
 tests/test_dtype/conftest.py     | 10 +++++-
 tests/test_dtype/test_wrapper.py | 61 ++++++++++----------------------
 2 files changed, 28 insertions(+), 43 deletions(-)

diff --git a/tests/test_dtype/conftest.py b/tests/test_dtype/conftest.py
index d8ef17a039..9c7825c0d1 100644
--- a/tests/test_dtype/conftest.py
+++ b/tests/test_dtype/conftest.py
@@ -53,6 +53,14 @@ class TestB(TestExample):
         param_a = [1, 2, 100, 10]
 
     """
+    # Iterate over all the fixtures defined in the class
+    # and parametrize them with the values defined in the class
+    # This allows us to define class-scoped fixtures as class attributes
+    # and then generate the parametrize calls for pytest
     for fixture_name in metafunc.fixturenames:
         if hasattr(metafunc.cls, fixture_name):
-            metafunc.parametrize(fixture_name, getattr(metafunc.cls, fixture_name), scope="class")
+            params = getattr(metafunc.cls, fixture_name)
+            if len(params) == 0:
+                msg = f"{metafunc.cls}.{fixture_name} is empty. Please provide a non-empty sequence of values."
+                raise ValueError(msg)
+            metafunc.parametrize(fixture_name, params, scope="class")
diff --git a/tests/test_dtype/test_wrapper.py b/tests/test_dtype/test_wrapper.py
index 49e05340e0..defd3fffc5 100644
--- a/tests/test_dtype/test_wrapper.py
+++ b/tests/test_dtype/test_wrapper.py
@@ -5,6 +5,23 @@
 if TYPE_CHECKING:
     from zarr.core.dtype.wrapper import TBaseDType, TBaseScalar, ZDType
 
+import pytest
+import requests
+
+
+class _TestZDTypeSchema:
+    # subclasses define the URL for the schema, if available
+    schema_url: ClassVar[str] = ""
+
+    @pytest.fixture(scope="class")
+    def get_schema(self) -> object:
+        response = requests.get(self.schema_url)
+        response.raise_for_status()
+        return json_schema.loads(response.text)
+
+    def test_schema(self, schema: json_schema.Schema) -> None:
+        assert schema.is_valid(self.test_cls.to_json(zarr_format=2))
+
 
 class _TestZDType:
     test_cls: type[ZDType[TBaseDType, TBaseScalar]]
@@ -47,50 +64,10 @@ def test_scalar_roundtrip_v2(self, scalar_v2_params: Any) -> None:
         dtype_json, scalar_json = scalar_v2_params
         zdtype = self.test_cls.from_json(dtype_json, zarr_format=2)
         scalar = zdtype.from_json_value(scalar_json, zarr_format=2)
-        assert self._scalar_equals(scalar_json, zdtype.to_json_value(scalar, zarr_format=2))
+        assert scalar_json == zdtype.to_json_value(scalar, zarr_format=2)
 
     def test_scalar_roundtrip_v3(self, scalar_v3_params: Any) -> None:
         dtype_json, scalar_json = scalar_v3_params
         zdtype = self.test_cls.from_json(dtype_json, zarr_format=3)
         scalar = zdtype.from_json_value(scalar_json, zarr_format=3)
-        assert self._scalar_equals(scalar_json, zdtype.to_json_value(scalar, zarr_format=3))
-
-    @staticmethod
-    def _scalar_equals(a: object, b: object) -> bool:
-        """
-        Compare two scalars for equality. Subclasses that test dtypes with scalars that don't allow
-        simple equality like nans should override this method.
-        """
-        return a == b
-
-    """ @abc.abstractmethod
-    def test_cast_value(self, value: Any) -> None:
-        raise NotImplementedError
-
-    @abc.abstractmethod
-    def test_check_value(self) -> None:
-        raise NotImplementedError
-
-    @abc.abstractmethod
-    def test_default_value(self) -> None:
-        raise NotImplementedError
-
-    @abc.abstractmethod
-    def test_check_json(self, value: Any) -> None:
-        raise NotImplementedError
-
-    @abc.abstractmethod
-    def test_from_json_roundtrip_v2(self, value: Any) -> None:
-        raise NotImplementedError
-
-    @abc.abstractmethod
-    def test_from_json_roundtrip_v3(self, value: Any) -> None:
-        raise NotImplementedError
-
-    @abc.abstractmethod
-    def test_from_json_value_roundtrip_v2(self, value: Any) -> None:
-        raise NotImplementedError
-
-    @abc.abstractmethod
-    def test_from_json_value_roundtrip_v3(self, value: Any) -> None:
-        raise NotImplementedError """
+        assert scalar_json == zdtype.to_json_value(scalar, zarr_format=3)

From 52518c24fa96a4532e62b4d996d24540e6bc4e63 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Tue, 13 May 2025 13:04:20 +0200
Subject: [PATCH 079/130] add casting tests

---
 src/zarr/core/dtype/npy/bool.py           |   5 +-
 src/zarr/core/dtype/npy/complex.py        |   4 +-
 src/zarr/core/dtype/npy/float.py          |   4 +-
 src/zarr/core/dtype/npy/int.py            |   4 +-
 src/zarr/core/dtype/npy/sized.py          |  37 ++--
 src/zarr/core/dtype/npy/time.py           |  16 +-
 src/zarr/core/dtype/wrapper.py            |   8 +-
 tests/test_dtype/test_dtype.py            | 248 ----------------------
 tests/test_dtype/test_npy/test_bool.py    |  13 +-
 tests/test_dtype/test_npy/test_complex.py |  49 ++++-
 tests/test_dtype/test_npy/test_float.py   |  76 +++++--
 tests/test_dtype/test_npy/test_int.py     |  72 +++++--
 tests/test_dtype/test_npy/test_sized.py   |  79 +++++--
 tests/test_dtype/test_npy/test_string.py  |  30 ++-
 tests/test_dtype/test_npy/test_time.py    |  63 +++++-
 tests/test_dtype/test_wrapper.py          |  38 ++--
 tests/test_properties.py                  |   1 +
 17 files changed, 381 insertions(+), 366 deletions(-)

diff --git a/src/zarr/core/dtype/npy/bool.py b/src/zarr/core/dtype/npy/bool.py
index 776acf4f8c..c80033c54e 100644
--- a/src/zarr/core/dtype/npy/bool.py
+++ b/src/zarr/core/dtype/npy/bool.py
@@ -101,14 +101,11 @@ def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.bool_:
         """
         if check_json_bool(data):
             return self._cast_value_unsafe(data)
-        raise TypeError(f"Invalid type: {data}. Expected a boolean.")
+        raise TypeError(f"Invalid type: {data}. Expected a boolean.")  # pragma: no cover
 
     def check_value(self, data: object) -> bool:
         # Anything can become a bool
         return True
 
-    def cast_value(self, value: object) -> np.bool_:
-        return self._cast_value_unsafe(value)
-
     def _cast_value_unsafe(self, value: object) -> np.bool_:
         return np.bool_(value)
diff --git a/src/zarr/core/dtype/npy/complex.py b/src/zarr/core/dtype/npy/complex.py
index 6e19266660..fab4ca9893 100644
--- a/src/zarr/core/dtype/npy/complex.py
+++ b/src/zarr/core/dtype/npy/complex.py
@@ -84,9 +84,7 @@ def check_value(self, value: object) -> bool:
         return isinstance(value, ComplexLike)
 
     def _cast_value_unsafe(self, value: object) -> TComplexScalar_co:
-        if self.check_value(value):
-            return self.to_dtype().type(value)  # type: ignore[arg-type, return-value]
-        raise TypeError(f"Invalid type: {value}. Expected a value castable to a complex scalar.")
+        return self.to_dtype().type(value)  # type: ignore[arg-type, return-value]
 
     def default_value(self) -> TComplexScalar_co:
         """
diff --git a/src/zarr/core/dtype/npy/float.py b/src/zarr/core/dtype/npy/float.py
index 15baaaadaa..bedd6a4751 100644
--- a/src/zarr/core/dtype/npy/float.py
+++ b/src/zarr/core/dtype/npy/float.py
@@ -76,9 +76,7 @@ def check_value(self, value: object) -> TypeGuard[FloatLike]:
         return isinstance(value, FloatLike)
 
     def _cast_value_unsafe(self, value: object) -> TFloatScalar_co:
-        if self.check_value(value):
-            return self.to_dtype().type(value)  # type: ignore[return-value]
-        raise TypeError(f"Invalid type: {value}. Expected a value castable to a float.")
+        return self.to_dtype().type(value)  # type: ignore[return-value, arg-type]
 
     def default_value(self) -> TFloatScalar_co:
         """
diff --git a/src/zarr/core/dtype/npy/int.py b/src/zarr/core/dtype/npy/int.py
index 7da7245162..78d9499243 100644
--- a/src/zarr/core/dtype/npy/int.py
+++ b/src/zarr/core/dtype/npy/int.py
@@ -71,9 +71,7 @@ def check_value(self, value: object) -> TypeGuard[IntLike]:
         return isinstance(value, IntLike)
 
     def _cast_value_unsafe(self, value: object) -> TIntScalar_co:
-        if self.check_value(value):
-            return self.to_dtype().type(value)  # type: ignore[return-value]
-        raise TypeError(f"Invalid type: {value}. Expected a value castable to an integer.")
+        return self.to_dtype().type(value)  # type: ignore[return-value, arg-type]
 
     def default_value(self) -> TIntScalar_co:
         """
diff --git a/src/zarr/core/dtype/npy/sized.py b/src/zarr/core/dtype/npy/sized.py
index 032a1ec5c0..281c634856 100644
--- a/src/zarr/core/dtype/npy/sized.py
+++ b/src/zarr/core/dtype/npy/sized.py
@@ -76,7 +76,7 @@ def to_json_value(self, data: object, *, zarr_format: ZarrFormat) -> str:
     def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.bytes_:
         if check_json_str(data):
             return self.to_dtype().type(base64.standard_b64decode(data.encode("ascii")))
-        raise TypeError(f"Invalid type: {data}. Expected a string.")
+        raise TypeError(f"Invalid type: {data}. Expected a string.")  # pragma: no cover
 
     def check_value(self, data: object) -> bool:
         return isinstance(data, np.bytes_ | str | bytes)
@@ -162,7 +162,7 @@ def to_json_value(self, data: object, *, zarr_format: ZarrFormat) -> str:
     def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.void:
         if check_json_str(data):
             return self.to_dtype().type(base64.standard_b64decode(data))
-        raise DataTypeValidationError(f"Invalid type: {data}. Expected a string.")
+        raise TypeError(f"Invalid type: {data}. Expected a string.")  # pragma: no cover
 
     def check_value(self, data: object) -> bool:
         return isinstance(data, np.bytes_ | str | bytes | np.void)
@@ -234,9 +234,9 @@ def to_json_value(self, data: object, *, zarr_format: ZarrFormat) -> str:
         return str(data)
 
     def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.str_:
-        if not check_json_str(data):
-            raise TypeError(f"Invalid type: {data}. Expected a string.")
-        return self.to_dtype().type(data)
+        if check_json_str(data):
+            return self.to_dtype().type(data)
+        raise TypeError(f"Invalid type: {data}. Expected a string.")  # pragma: no cover
 
     def check_value(self, data: object) -> bool:
         return isinstance(data, str | np.str_ | bytes)
@@ -332,6 +332,7 @@ def check_json(
     def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
         from zarr.core.dtype import get_data_type_from_json
 
+        # This is a horrible mess, because this data type is recursive
         if cls.check_json(data, zarr_format=zarr_format):
             if zarr_format == 2:
                 # structured dtypes are constructed directly from a list of lists
@@ -352,9 +353,13 @@ def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
                         )
                         return cls(fields=fields)
                     else:
-                        raise TypeError(f"Invalid type: {data}. Expected a dictionary.")
+                        raise TypeError(
+                            f"Invalid type: {data}. Expected a dictionary."
+                        )  # pragma: no cover
                 else:
-                    raise TypeError(f"Invalid type: {data}. Expected a dictionary.")
+                    raise TypeError(
+                        f"Invalid type: {data}. Expected a dictionary."
+                    )  # pragma: no cover
             raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
         raise DataTypeValidationError(f"Invalid JSON representation of data type {cls}.")
 
@@ -368,16 +373,12 @@ def to_json_value(self, data: object, *, zarr_format: ZarrFormat) -> str:
         return bytes_to_json(self.cast_value(data).tobytes(), zarr_format)
 
     def check_value(self, data: object) -> bool:
-        # not sure which values we should accept for structured dtypes.
-        try:
-            np.array([data], dtype=self.to_dtype())
-            return True  # noqa: TRY300
-        except ValueError:
-            return False
+        # TODO: implement something here!
+        return True
 
     def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.void:
-        if not check_json_str(data):
-            raise TypeError(f"Invalid type: {data}. Expected a string.")
-        as_bytes = bytes_from_json(data, zarr_format=zarr_format)
-        dtype = self.to_dtype()
-        return cast("np.void", np.array([as_bytes], dtype=dtype.str).view(dtype)[0])
+        if check_json_str(data):
+            as_bytes = bytes_from_json(data, zarr_format=zarr_format)
+            dtype = self.to_dtype()
+            return cast("np.void", np.array([as_bytes]).view(dtype)[0])
+        raise TypeError(f"Invalid type: {data}. Expected a string.")  # pragma: no cover
diff --git a/src/zarr/core/dtype/npy/time.py b/src/zarr/core/dtype/npy/time.py
index b8fc85b297..bbdd41d13f 100644
--- a/src/zarr/core/dtype/npy/time.py
+++ b/src/zarr/core/dtype/npy/time.py
@@ -33,7 +33,7 @@
 _DTypeName = Literal["datetime64", "timedelta64"]
 
 
-def datetime_from_int(data: int, *, unit: DateTimeUnit, interval: int) -> np.datetime64:
+def datetime_from_int(data: int, *, unit: DateTimeUnit, scale_factor: int) -> np.datetime64:
     """
     Convert an integer to a datetime64.
 
@@ -43,15 +43,15 @@ def datetime_from_int(data: int, *, unit: DateTimeUnit, interval: int) -> np.dat
         The integer to convert.
     unit : DateTimeUnit
         The unit of the datetime64.
-    interval : int
-        The interval of the datetime64.
+    scale_factor : int
+        The scale factor of the datetime64.
 
     Returns
     -------
     np.datetime64
         The datetime64 value.
     """
-    dtype_name = f"datetime64[{interval}{unit}]"
+    dtype_name = f"datetime64[{scale_factor}{unit}]"
     return cast("np.datetime64", np.int64(data).view(dtype_name))
 
 
@@ -184,9 +184,9 @@ def default_value(self) -> np.timedelta64:
         return np.timedelta64("NaT")
 
     def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.timedelta64:
-        if check_json_int(data):
+        if check_json_int(data) or data == "NaT":
             return self.to_dtype().type(data, f"{self.scale_factor}{self.unit}")
-        raise TypeError(f"Invalid type: {data}. Expected an integer.")
+        raise TypeError(f"Invalid type: {data}. Expected an integer.")  # pragma: no cover
 
     def _cast_value_unsafe(self, value: object) -> np.timedelta64:
         return self.to_dtype().type(value)  # type: ignore[arg-type]
@@ -231,9 +231,9 @@ def default_value(self) -> np.datetime64:
         return np.datetime64("NaT")
 
     def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.datetime64:
-        if check_json_int(data):
+        if check_json_int(data) or data == "NaT":
             return self.to_dtype().type(data, f"{self.scale_factor}{self.unit}")
-        raise TypeError(f"Invalid type: {data}. Expected an integer.")
+        raise TypeError(f"Invalid type: {data}. Expected an integer.")  # pragma: no cover
 
     def _cast_value_unsafe(self, value: object) -> np.datetime64:
         return self.to_dtype().type(value)  # type: ignore[no-any-return, call-overload]
diff --git a/src/zarr/core/dtype/wrapper.py b/src/zarr/core/dtype/wrapper.py
index 0600fab80b..199cbda5d8 100644
--- a/src/zarr/core/dtype/wrapper.py
+++ b/src/zarr/core/dtype/wrapper.py
@@ -159,7 +159,13 @@ def cast_value(self, data: object) -> TScalar_co:
         """
         if self.check_value(data):
             return self._cast_value_unsafe(data)
-        raise TypeError(f"Invalid value: {data}")
+        msg = (
+            f"The value {data} failed a type check."
+            f"It cannot be safely cast to a scalar compatible with {self.dtype_cls}."
+            f"Consult the documentation for {self} to determine the possible values that can"
+            "be cast to scalars of the wrapped data type."
+        )
+        raise TypeError(msg)
 
     @abstractmethod
     def check_value(self, data: object) -> bool:
diff --git a/tests/test_dtype/test_dtype.py b/tests/test_dtype/test_dtype.py
index 566a04b5fb..e69de29bb2 100644
--- a/tests/test_dtype/test_dtype.py
+++ b/tests/test_dtype/test_dtype.py
@@ -1,248 +0,0 @@
-from __future__ import annotations
-
-from typing import TYPE_CHECKING, Any, get_args
-
-from zarr.core.dtype import (
-    DTYPE,
-    Bool,
-    Complex64,
-    Complex128,
-    DateTime64,
-    FixedLengthAscii,
-    FixedLengthBytes,
-    FixedLengthUnicode,
-    Float16,
-    Float32,
-    Float64,
-    Int8,
-    Int16,
-    Int32,
-    Int64,
-    Structured,
-    UInt8,
-    UInt16,
-    UInt32,
-    UInt64,
-    VariableLengthString,
-    ZDType,
-)
-
-from .conftest import zdtype_examples
-
-if TYPE_CHECKING:
-    from zarr.core.common import ZarrFormat
-    from zarr.core.dtype.wrapper import TBaseDType, TBaseScalar
-
-import numpy as np
-import pytest
-
-from zarr.core.dtype.common import DataTypeValidationError
-
-_NUMPY_SUPPORTS_VLEN_STRING = hasattr(np.dtypes, "StringDType")
-VLEN_STRING_DTYPE: np.dtypes.StringDType | np.dtypes.ObjectDType
-if _NUMPY_SUPPORTS_VLEN_STRING:
-    VLEN_STRING_DTYPE = np.dtypes.StringDType()
-    VLEN_STRING_CODE = "T"
-else:
-    VLEN_STRING_DTYPE = np.dtypes.ObjectDType()
-    VLEN_STRING_CODE = "O"
-
-
-def test_zdtype_examples() -> None:
-    """
-    Test that all the elements of the exported union type DTYPE have an example in the variable
-    zdtype_examples, which we use for testing.
-
-    If this test fails, that means that either there is a data type that does not have an example,
-    or there is a data type that is missing from the DTYPE union type.
-    """
-    assert set(map(type, zdtype_examples)) == set(get_args(DTYPE))
-
-
-@pytest.mark.parametrize(
-    ("wrapper_cls", "np_dtype"),
-    [
-        (Bool, "bool"),
-        (Int8, "int8"),
-        (Int16, "int16"),
-        (Int32, "int32"),
-        (Int64, "int64"),
-        (UInt8, "uint8"),
-        (UInt16, "uint16"),
-        (UInt32, "uint32"),
-        (UInt64, "uint64"),
-        (Float32, "float32"),
-        (Float64, "float64"),
-        (Complex64, "complex64"),
-        (Complex128, "complex128"),
-        (FixedLengthUnicode, "U"),
-        (FixedLengthAscii, "S"),
-        (FixedLengthBytes, "V"),
-        (VariableLengthString, VLEN_STRING_CODE),
-        (Structured, np.dtype([("a", np.float64), ("b", np.int8)])),
-        (DateTime64, "datetime64[s]"),
-    ],
-)
-def test_wrap(wrapper_cls: type[ZDType[Any, Any]], np_dtype: np.dtype[np.generic] | str) -> None:
-    """
-    Test that the wrapper class has the correct dtype class bound to the dtype_cls variable
-    Test that the ``wrap`` method produces an instance of the wrapper class
-    Test that the ``unwrap`` method returns the original dtype
-    """
-    dt = np.dtype(np_dtype)
-    assert wrapper_cls.dtype_cls is type(dt)
-    wrapped = wrapper_cls.from_dtype(dt)
-
-    with pytest.raises(DataTypeValidationError, match="Invalid dtype"):
-        wrapper_cls.from_dtype("not a dtype")  # type: ignore[arg-type]
-    assert isinstance(wrapped, wrapper_cls)
-    assert wrapped.to_dtype() == dt
-
-
-@pytest.mark.parametrize("zdtype", zdtype_examples)
-def test_to_json_roundtrip(zdtype: ZDType[Any, Any], zarr_format: ZarrFormat) -> None:
-    """
-    Test that a zdtype instance can round-trip through its JSON form
-    """
-    as_dict = zdtype.to_json(zarr_format=zarr_format)
-    assert zdtype.from_json(as_dict, zarr_format=zarr_format) == zdtype
-
-
-@pytest.mark.parametrize(
-    ("wrapper", "expected_default"),
-    [
-        (Bool(), np.False_),
-        (Int8(), np.int8(0)),
-        (UInt8(), np.uint8(0)),
-        (Int16(), np.int16(0)),
-        (UInt16(), np.uint16(0)),
-        (Int32(), np.int32(0)),
-        (UInt32(), np.uint32(0)),
-        (Int64(), np.int64(0)),
-        (UInt64(), np.uint64(0)),
-        (Float16(), np.float16(0)),
-        (Float32(), np.float32(0)),
-        (Float64(), np.float64(0)),
-        (Complex64(), np.complex64(0)),
-        (Complex128(), np.complex128(0)),
-        (FixedLengthAscii(length=3), np.bytes_(b"")),
-        (FixedLengthBytes(length=3), np.void(b"\x00\x00\x00")),
-        (FixedLengthUnicode(length=3), np.str_("")),
-        (
-            Structured(fields=(("a", Float64()), ("b", Int8()))),
-            np.array([0], dtype=[("a", np.float64), ("b", np.int8)])[0],
-        ),
-        (VariableLengthString(), ""),
-        (DateTime64(unit="s"), np.datetime64("NaT")),
-    ],
-)
-def test_default_value(wrapper: ZDType[Any, Any], expected_default: Any) -> None:
-    """
-    Test that the default_value method is correctly set for each dtype wrapper.
-    """
-    if isinstance(wrapper, DateTime64):
-        assert np.isnan(wrapper.default_value())
-    else:
-        assert wrapper.default_value() == expected_default
-
-
-@pytest.mark.parametrize(
-    ("wrapper", "input_value", "expected_json"),
-    [
-        (Bool(), np.bool_(True), True),
-        (Int8(), np.int8(42), 42),
-        (UInt8(), np.uint8(42), 42),
-        (Int16(), np.int16(42), 42),
-        (UInt16(), np.uint16(42), 42),
-        (Int32(), np.int32(42), 42),
-        (UInt32(), np.uint32(42), 42),
-        (Int64(), np.int64(42), 42),
-        (UInt64(), np.uint64(42), 42),
-        (Float16(), np.float16(42.0), 42.0),
-        (Float32(), np.float32(42.0), 42.0),
-        (Float64(), np.float64(42.0), 42.0),
-        (Complex64(), np.complex64(42.0 + 1.0j), (42.0, 1.0)),
-        (Complex128(), np.complex128(42.0 + 1.0j), (42.0, 1.0)),
-        (FixedLengthAscii(length=4), np.bytes_(b"test"), "dGVzdA=="),
-        (FixedLengthBytes(length=4), np.void(b"test"), "dGVzdA=="),
-        (FixedLengthUnicode(length=4), np.str_("test"), "test"),
-        (VariableLengthString(), "test", "test"),
-        (DateTime64(unit="s"), np.datetime64("2021-01-01T00:00:00", "s"), 1609459200),
-    ],
-)
-def test_to_json_value_v2(
-    wrapper: ZDType[TBaseDType, TBaseScalar], input_value: Any, expected_json: Any
-) -> None:
-    """
-    Test the to_json_value method for each dtype wrapper for zarr v2
-    """
-    assert wrapper.to_json_value(input_value, zarr_format=2) == expected_json
-
-
-# NOTE! This test is currently a direct copy of the v2 version. When or if we change JSON serialization
-# in a v3-specific manner, this test must be changed.
-# TODO: Apply zarr-v3-specific changes to this test as needed
-@pytest.mark.parametrize(
-    ("wrapper", "input_value", "expected_json"),
-    [
-        (Bool(), np.bool_(True), True),
-        (Int8(), np.int8(42), 42),
-        (UInt8(), np.uint8(42), 42),
-        (Int16(), np.int16(42), 42),
-        (UInt16(), np.uint16(42), 42),
-        (Int32(), np.int32(42), 42),
-        (UInt32(), np.uint32(42), 42),
-        (Int64(), np.int64(42), 42),
-        (UInt64(), np.uint64(42), 42),
-        (Float16(), np.float16(42.0), 42.0),
-        (Float32(), np.float32(42.0), 42.0),
-        (Float64(), np.float64(42.0), 42.0),
-        (Complex64(), np.complex64(42.0 + 1.0j), (42.0, 1.0)),
-        (Complex128(), np.complex128(42.0 + 1.0j), (42.0, 1.0)),
-        (FixedLengthAscii(length=4), np.bytes_(b"test"), "dGVzdA=="),
-        (FixedLengthBytes(length=4), np.void(b"test"), "dGVzdA=="),
-        (FixedLengthUnicode(length=4), np.str_("test"), "test"),
-        (VariableLengthString(), "test", "test"),
-        (DateTime64(unit="s"), np.datetime64("2021-01-01T00:00:00", "s"), 1609459200),
-    ],
-)
-def test_to_json_value_v3(
-    wrapper: ZDType[TBaseDType, TBaseScalar], input_value: Any, expected_json: Any
-) -> None:
-    """
-    Test the to_json_value method for each dtype wrapper for zarr v3
-    """
-    assert wrapper.to_json_value(input_value, zarr_format=3) == expected_json
-
-
-@pytest.mark.parametrize(
-    ("wrapper", "json_value", "expected_value"),
-    [
-        (Bool(), True, np.bool_(True)),
-        (Int8(), 42, np.int8(42)),
-        (UInt8(), 42, np.uint8(42)),
-        (Int16(), 42, np.int16(42)),
-        (UInt16(), 42, np.uint16(42)),
-        (Int32(), 42, np.int32(42)),
-        (UInt32(), 42, np.uint32(42)),
-        (Int64(), 42, np.int64(42)),
-        (UInt64(), 42, np.uint64(42)),
-        (Float16(), 42.0, np.float16(42.0)),
-        (Float32(), 42.0, np.float32(42.0)),
-        (Float64(), 42.0, np.float64(42.0)),
-        (Complex64(), (42.0, 1.0), np.complex64(42.0 + 1.0j)),
-        (Complex128(), (42.0, 1.0), np.complex128(42.0 + 1.0j)),
-        (FixedLengthAscii(length=4), "dGVzdA==", np.bytes_(b"test")),
-        (FixedLengthBytes(length=4), "dGVzdA==", np.void(b"test")),
-        (FixedLengthUnicode(length=4), "test", np.str_("test")),
-        (VariableLengthString(), "test", "test"),
-        (DateTime64(unit="s"), 1609459200, np.datetime64("2021-01-01T00:00:00", "s")),
-    ],
-)
-def test_from_json_value(
-    wrapper: ZDType[TBaseDType, TBaseScalar], json_value: Any, expected_value: Any
-) -> None:
-    """
-    Test the from_json_value method for each dtype wrapper.
-    """
-    assert wrapper.from_json_value(json_value, zarr_format=2) == expected_value
diff --git a/tests/test_dtype/test_npy/test_bool.py b/tests/test_dtype/test_npy/test_bool.py
index 1040683846..086a2cfee8 100644
--- a/tests/test_dtype/test_npy/test_bool.py
+++ b/tests/test_dtype/test_npy/test_bool.py
@@ -8,6 +8,7 @@
 
 class TestBool(_TestZDType):
     test_cls = Bool
+
     valid_dtype = (np.dtype(np.bool_),)
     invalid_dtype = (
         np.dtype(np.int8),
@@ -27,5 +28,13 @@ class TestBool(_TestZDType):
         {"name": "bool", "configuration": {"endianness": "little"}},
     )
 
-    scalar_v2_params = (("|b1", True), ("|b1", False))
-    scalar_v3_params = (("bool", True), ("bool", False))
+    scalar_v2_params = ((Bool(), True), (Bool(), False))
+    scalar_v3_params = ((Bool(), True), (Bool(), False))
+
+    cast_value_params = (
+        (Bool(), "true", np.True_),
+        (Bool(), True, np.True_),
+        (Bool(), False, np.False_),
+        (Bool(), np.True_, np.True_),
+        (Bool(), np.False_, np.False_),
+    )
diff --git a/tests/test_dtype/test_npy/test_complex.py b/tests/test_dtype/test_npy/test_complex.py
index aac514028d..b24bc4d7c8 100644
--- a/tests/test_dtype/test_npy/test_complex.py
+++ b/tests/test_dtype/test_npy/test_complex.py
@@ -1,12 +1,21 @@
 from __future__ import annotations
 
+import math
+
 import numpy as np
 
 from tests.test_dtype.test_wrapper import _TestZDType
 from zarr.core.dtype.npy.complex import Complex64, Complex128
 
 
-class TestComplex64(_TestZDType):
+class _BaseTestFloat(_TestZDType):
+    def scalar_equals(self, scalar1: object, scalar2: object) -> bool:
+        if np.isnan(scalar1) and np.isnan(scalar2):  # type: ignore[call-overload]
+            return True
+        return super().scalar_equals(scalar1, scalar2)
+
+
+class TestComplex64(_BaseTestFloat):
     test_cls = Complex64
     valid_dtype = (np.dtype(">c8"), np.dtype("<c8"))
     invalid_dtype = (
@@ -14,7 +23,7 @@ class TestComplex64(_TestZDType):
         np.dtype(np.float64),
         np.dtype(np.complex128),
     )
-    valid_json_v2 = (">c8", ">c8")
+    valid_json_v2 = (">c8", "<c8")
     valid_json_v3 = ("complex64",)
     invalid_json_v2 = (
         "|c8",
@@ -27,15 +36,24 @@ class TestComplex64(_TestZDType):
         {"name": "complex64", "configuration": {"endianness": "little"}},
     )
 
-    scalar_v2_params = ((">c8", (1.0, 1.0)), ("<c8", (-1.0, "Infinity")), (">c8", (0, "NaN")))
+    scalar_v2_params = (
+        (Complex64(), (1.0, 1.0)),
+        (Complex64(), (-1.0, "Infinity")),
+        (Complex64(), (0, "NaN")),
+    )
     scalar_v3_params = (
-        ("complex64", (1.0, 1.0)),
-        ("complex64", (-1.0, "Infinity")),
-        ("complex64", (0, "NaN")),
+        (Complex64(), (1.0, 1.0)),
+        (Complex64(), (-1.0, "Infinity")),
+        (Complex64(), (0, "NaN")),
+    )
+    cast_value_params = (
+        (Complex64(), complex(1.0, 1.0), np.complex64(complex(1.0, 1.0))),
+        (Complex64(), complex(-1.0, math.inf), np.complex64(complex(-1.0, math.inf))),
+        (Complex64(), complex(0, math.nan), np.complex64(complex(0, math.nan))),
     )
 
 
-class TestComplex128(_TestZDType):
+class TestComplex128(_BaseTestFloat):
     test_cls = Complex128
     valid_dtype = (np.dtype(">c16"), np.dtype("<c16"))
     invalid_dtype = (
@@ -56,9 +74,18 @@ class TestComplex128(_TestZDType):
         {"name": "complex128", "configuration": {"endianness": "little"}},
     )
 
-    scalar_v2_params = ((">c16", (1.0, 1.0)), ("<c16", (-1.0, "Infinity")), (">c16", (0, "NaN")))
+    scalar_v2_params = (
+        (Complex128(), (1.0, 1.0)),
+        (Complex128(), (-1.0, "Infinity")),
+        (Complex128(), (0, "NaN")),
+    )
     scalar_v3_params = (
-        ("complex128", (1.0, 1.0)),
-        ("complex128", (-1.0, "Infinity")),
-        ("complex128", (0, "NaN")),
+        (Complex128(), (1.0, 1.0)),
+        (Complex128(), (-1.0, "Infinity")),
+        (Complex128(), (0, "NaN")),
+    )
+    cast_value_params = (
+        (Complex128(), complex(1.0, 1.0), np.complex128(complex(1.0, 1.0))),
+        (Complex128(), complex(-1.0, math.inf), np.complex128(complex(-1.0, math.inf))),
+        (Complex128(), complex(0, math.nan), np.complex128(complex(0, math.nan))),
     )
diff --git a/tests/test_dtype/test_npy/test_float.py b/tests/test_dtype/test_npy/test_float.py
index 232ed1e32c..5981d09514 100644
--- a/tests/test_dtype/test_npy/test_float.py
+++ b/tests/test_dtype/test_npy/test_float.py
@@ -6,7 +6,14 @@
 from zarr.core.dtype.npy.float import Float16, Float32, Float64
 
 
-class TestFloat16(_TestZDType):
+class _BaseTestFloat(_TestZDType):
+    def scalar_equals(self, scalar1: object, scalar2: object) -> bool:
+        if np.isnan(scalar1) and np.isnan(scalar2):  # type: ignore[call-overload]
+            return True
+        return super().scalar_equals(scalar1, scalar2)
+
+
+class TestFloat16(_BaseTestFloat):
     test_cls = Float16
     valid_dtype = (np.dtype(">f2"), np.dtype("<f2"))
     invalid_dtype = (
@@ -27,17 +34,28 @@ class TestFloat16(_TestZDType):
         {"name": "float16", "configuration": {"endianness": "little"}},
     )
 
-    scalar_v2_params = ((">f2", 1.0), ("<f2", -1.0), ("<f2", "NaN"), (">f2", "Infinity"))
+    scalar_v2_params = (
+        (Float16(), 1.0),
+        (Float16(), -1.0),
+        (Float16(), "NaN"),
+        (Float16(), "Infinity"),
+    )
     scalar_v3_params = (
-        ("float16", 1.0),
-        ("float16", -1.0),
-        ("float16", "NaN"),
-        ("float16", "Infinity"),
+        (Float16(), 1.0),
+        (Float16(), -1.0),
+        (Float16(), "NaN"),
+        (Float16(), "Infinity"),
+    )
+    cast_value_params = (
+        (Float16(), 1.0, np.float16(1.0)),
+        (Float16(), -1.0, np.float16(-1.0)),
+        (Float16(), "NaN", np.float16("NaN")),
     )
 
 
-class TestFloat32(_TestZDType):
+class TestFloat32(_BaseTestFloat):
     test_cls = Float32
+    scalar_type = np.float32
     valid_dtype = (np.dtype(">f4"), np.dtype("<f4"))
     invalid_dtype = (
         np.dtype(np.int8),
@@ -57,16 +75,27 @@ class TestFloat32(_TestZDType):
         {"name": "float32", "configuration": {"endianness": "little"}},
     )
 
-    scalar_v2_params = ((">f4", 1.0), ("<f4", -1.0), ("<f4", "NaN"), (">f4", "Infinity"))
+    scalar_v2_params = (
+        (Float32(), 1.0),
+        (Float32(), -1.0),
+        (Float32(), "NaN"),
+        (Float32(), "Infinity"),
+    )
     scalar_v3_params = (
-        ("float32", 1.0),
-        ("float32", -1.0),
-        ("float32", "NaN"),
-        ("float32", "Infinity"),
+        (Float32(), 1.0),
+        (Float32(), -1.0),
+        (Float32(), "NaN"),
+        (Float32(), "Infinity"),
+    )
+
+    cast_value_params = (
+        (Float32(), 1.0, np.float32(1.0)),
+        (Float32(), -1.0, np.float32(-1.0)),
+        (Float32(), "NaN", np.float32("NaN")),
     )
 
 
-class TestFloat64(_TestZDType):
+class TestFloat64(_BaseTestFloat):
     test_cls = Float64
     valid_dtype = (np.dtype(">f8"), np.dtype("<f8"))
     invalid_dtype = (
@@ -87,10 +116,21 @@ class TestFloat64(_TestZDType):
         {"name": "float64", "configuration": {"endianness": "little"}},
     )
 
-    scalar_v2_params = ((">f8", 1.0), ("<f8", -1.0), ("<f8", "NaN"), (">f8", "Infinity"))
+    scalar_v2_params = (
+        (Float64(), 1.0),
+        (Float64(), -1.0),
+        (Float64(), "NaN"),
+        (Float64(), "Infinity"),
+    )
     scalar_v3_params = (
-        ("float64", 1.0),
-        ("float64", -1.0),
-        ("float64", "NaN"),
-        ("float64", "Infinity"),
+        (Float64(), 1.0),
+        (Float64(), -1.0),
+        (Float64(), "NaN"),
+        (Float64(), "Infinity"),
+    )
+
+    cast_value_params = (
+        (Float64(), 1.0, np.float64(1.0)),
+        (Float64(), -1.0, np.float64(-1.0)),
+        (Float64(), "NaN", np.float64("NaN")),
     )
diff --git a/tests/test_dtype/test_npy/test_int.py b/tests/test_dtype/test_npy/test_int.py
index 99f698fc8e..637b594e1b 100644
--- a/tests/test_dtype/test_npy/test_int.py
+++ b/tests/test_dtype/test_npy/test_int.py
@@ -8,6 +8,7 @@
 
 class TestInt8(_TestZDType):
     test_cls = Int8
+    scalar_type = np.int8
     valid_dtype = (np.dtype(np.int8),)
     invalid_dtype = (
         np.dtype(np.int16),
@@ -27,12 +28,17 @@ class TestInt8(_TestZDType):
         {"name": "int8", "configuration": {"endianness": "little"}},
     )
 
-    scalar_v2_params = (("|i1", 1), ("|i1", -1))
-    scalar_v3_params = (("int8", 1), ("int8", -1))
+    scalar_v2_params = ((Int8(), 1), (Int8(), -1))
+    scalar_v3_params = ((Int8(), 1), (Int8(), -1))
+    cast_value_params = (
+        (Int8(), 1, np.int8(1)),
+        (Int8(), -1, np.int8(-1)),
+    )
 
 
 class TestInt16(_TestZDType):
     test_cls = Int16
+    scalar_type = np.int16
     valid_dtype = (np.dtype(">i2"), np.dtype("<i2"))
     invalid_dtype = (
         np.dtype(np.int8),
@@ -52,12 +58,17 @@ class TestInt16(_TestZDType):
         {"name": "int16", "configuration": {"endianness": "little"}},
     )
 
-    scalar_v2_params = (("<i2", 1), (">i2", -1))
-    scalar_v3_params = (("int16", 1), ("int16", -1))
+    scalar_v2_params = ((Int16(), 1), (Int16(), -1))
+    scalar_v3_params = ((Int16(), 1), (Int16(), -1))
+    cast_value_params = (
+        (Int16(), 1, np.int16(1)),
+        (Int16(), -1, np.int16(-1)),
+    )
 
 
 class TestInt32(_TestZDType):
     test_cls = Int32
+    scalar_type = np.int32
     valid_dtype = (np.dtype(">i4"), np.dtype("<i4"))
     invalid_dtype = (
         np.dtype(np.int8),
@@ -77,12 +88,17 @@ class TestInt32(_TestZDType):
         {"name": "int32", "configuration": {"endianness": "little"}},
     )
 
-    scalar_v2_params = (("<i4", 1), (">i4", -1))
-    scalar_v3_params = (("int32", 1), ("int32", -1))
+    scalar_v2_params = ((Int32(), 1), (Int32(), -1))
+    scalar_v3_params = ((Int32(), 1), (Int32(), -1))
+    cast_value_params = (
+        (Int32(), 1, np.int32(1)),
+        (Int32(), -1, np.int32(-1)),
+    )
 
 
 class TestInt64(_TestZDType):
     test_cls = Int64
+    scalar_type = np.int64
     valid_dtype = (np.dtype(">i8"), np.dtype("<i8"))
     invalid_dtype = (
         np.dtype(np.int8),
@@ -102,12 +118,17 @@ class TestInt64(_TestZDType):
         {"name": "int64", "configuration": {"endianness": "little"}},
     )
 
-    scalar_v2_params = (("<i8", 1), (">i8", -1))
-    scalar_v3_params = (("int64", 1), ("int64", -1))
+    scalar_v2_params = ((Int64(), 1), (Int64(), -1))
+    scalar_v3_params = ((Int64(), 1), (Int64(), -1))
+    cast_value_params = (
+        (Int64(), 1, np.int64(1)),
+        (Int64(), -1, np.int64(-1)),
+    )
 
 
 class TestUInt8(_TestZDType):
     test_cls = UInt8
+    scalar_type = np.uint8
     valid_dtype = (np.dtype(np.uint8),)
     invalid_dtype = (
         np.dtype(np.int8),
@@ -127,12 +148,17 @@ class TestUInt8(_TestZDType):
         {"name": "uint8", "configuration": {"endianness": "little"}},
     )
 
-    scalar_v2_params = (("|u1", 1), ("|u1", 0))
-    scalar_v3_params = (("uint8", 1), ("uint8", 0))
+    scalar_v2_params = ((UInt8(), 1), (UInt8(), 0))
+    scalar_v3_params = ((UInt8(), 1), (UInt8(), 0))
+    cast_value_params = (
+        (UInt8(), 1, np.uint8(1)),
+        (UInt8(), 0, np.uint8(0)),
+    )
 
 
 class TestUInt16(_TestZDType):
     test_cls = UInt16
+    scalar_type = np.uint16
     valid_dtype = (np.dtype(">u2"), np.dtype("<u2"))
     invalid_dtype = (
         np.dtype(np.int8),
@@ -152,12 +178,17 @@ class TestUInt16(_TestZDType):
         {"name": "uint16", "configuration": {"endianness": "little"}},
     )
 
-    scalar_v2_params = (("<u2", 1), (">u2", 0))
-    scalar_v3_params = (("uint16", 1), ("uint16", 0))
+    scalar_v2_params = ((UInt16(), 1), (UInt16(), 0))
+    scalar_v3_params = ((UInt16(), 1), (UInt16(), 0))
+    cast_value_params = (
+        (UInt16(), 1, np.uint16(1)),
+        (UInt16(), 0, np.uint16(0)),
+    )
 
 
 class TestUInt32(_TestZDType):
     test_cls = UInt32
+    scalar_type = np.uint32
     valid_dtype = (np.dtype(">u4"), np.dtype("<u4"))
     invalid_dtype = (
         np.dtype(np.int8),
@@ -177,12 +208,17 @@ class TestUInt32(_TestZDType):
         {"name": "uint32", "configuration": {"endianness": "little"}},
     )
 
-    scalar_v2_params = (("<u4", 1), (">u4", 0))
-    scalar_v3_params = (("uint32", 1), ("uint32", 0))
+    scalar_v2_params = ((UInt32(), 1), (UInt32(), 0))
+    scalar_v3_params = ((UInt32(), 1), (UInt32(), 0))
+    cast_value_params = (
+        (UInt32(), 1, np.uint32(1)),
+        (UInt32(), 0, np.uint32(0)),
+    )
 
 
 class TestUInt64(_TestZDType):
     test_cls = UInt64
+    scalar_type = np.uint64
     valid_dtype = (np.dtype(">u8"), np.dtype("<u8"))
     invalid_dtype = (
         np.dtype(np.int8),
@@ -202,5 +238,9 @@ class TestUInt64(_TestZDType):
         {"name": "uint64", "configuration": {"endianness": "little"}},
     )
 
-    scalar_v2_params = (("<u8", 1), (">u8", 0))
-    scalar_v3_params = (("uint64", 1), ("uint64", 0))
+    scalar_v2_params = ((UInt64(), 1), (UInt64(), 0))
+    scalar_v3_params = ((UInt64(), 1), (UInt64(), 0))
+    cast_value_params = (
+        (UInt64(), 1, np.uint64(1)),
+        (UInt64(), 0, np.uint64(0)),
+    )
diff --git a/tests/test_dtype/test_npy/test_sized.py b/tests/test_dtype/test_npy/test_sized.py
index 17f4b2af2d..2ded5bbb7c 100644
--- a/tests/test_dtype/test_npy/test_sized.py
+++ b/tests/test_dtype/test_npy/test_sized.py
@@ -1,8 +1,12 @@
 from __future__ import annotations
 
+from typing import Any
+
 import numpy as np
 
 from tests.test_dtype.test_wrapper import _TestZDType
+from zarr.core.dtype.npy.float import Float16, Float64
+from zarr.core.dtype.npy.int import Int32, Int64
 from zarr.core.dtype.npy.sized import (
     FixedLengthAscii,
     FixedLengthBytes,
@@ -31,11 +35,20 @@ class TestFixedLengthAscii(_TestZDType):
         {"name": "numpy.fixed_length_ascii", "configuration": {"length_bits": "invalid"}},
     )
 
-    scalar_v2_params = (("|S0", ""), ("|S2", "YWI="), ("|S4", "YWJjZA=="))
+    scalar_v2_params = (
+        (FixedLengthAscii(length=0), ""),
+        (FixedLengthAscii(length=2), "YWI="),
+        (FixedLengthAscii(length=4), "YWJjZA=="),
+    )
     scalar_v3_params = (
-        ({"name": "numpy.fixed_length_ascii", "configuration": {"length_bytes": 0}}, ""),
-        ({"name": "numpy.fixed_length_ascii", "configuration": {"length_bytes": 16}}, "YWI="),
-        ({"name": "numpy.fixed_length_ascii", "configuration": {"length_bytes": 32}}, "YWJjZA=="),
+        (FixedLengthAscii(length=0), ""),
+        (FixedLengthAscii(length=2), "YWI="),
+        (FixedLengthAscii(length=4), "YWJjZA=="),
+    )
+    cast_value_params = (
+        (FixedLengthAscii(length=0), "", np.bytes_("")),
+        (FixedLengthAscii(length=2), "ab", np.bytes_("ab")),
+        (FixedLengthAscii(length=4), "abcd", np.bytes_("abcd")),
     )
 
 
@@ -63,11 +76,20 @@ class TestFixedLengthBytes(_TestZDType):
         {"name": "r-80"},
     )
 
-    scalar_v2_params = (("|V0", ""), ("|V2", "YWI="), ("|V4", "YWJjZA=="))
+    scalar_v2_params = (
+        (FixedLengthBytes(length=0), ""),
+        (FixedLengthBytes(length=2), "YWI="),
+        (FixedLengthBytes(length=4), "YWJjZA=="),
+    )
     scalar_v3_params = (
-        ({"name": "numpy.fixed_length_bytes", "configuration": {"length_bytes": 2}}, ""),
-        ({"name": "numpy.fixed_length_bytes", "configuration": {"length_bytes": 2}}, "YWI="),
-        ({"name": "numpy.fixed_length_bytes", "configuration": {"length_bytes": 4}}, "YWJjZA=="),
+        (FixedLengthBytes(length=0), ""),
+        (FixedLengthBytes(length=2), "YWI="),
+        (FixedLengthBytes(length=4), "YWJjZA=="),
+    )
+    cast_value_params = (
+        (FixedLengthBytes(length=0), b"", np.void(b"")),
+        (FixedLengthBytes(length=2), b"ab", np.void(b"ab")),
+        (FixedLengthBytes(length=4), b"abcd", np.void(b"abcd")),
     )
 
 
@@ -91,11 +113,17 @@ class TestFixedLengthUnicode(_TestZDType):
         {"name": "numpy.fixed_length_ucs4", "configuration": {"length_bits": "invalid"}},
     )
 
-    scalar_v2_params = ((">U0", ""), ("<U2", "hi"))
+    scalar_v2_params = ((FixedLengthUnicode(length=0), ""), (FixedLengthUnicode(length=2), "hi"))
     scalar_v3_params = (
-        ({"name": "numpy.fixed_length_ucs4", "configuration": {"length_bytes": 0}}, ""),
-        ({"name": "numpy.fixed_length_ucs4", "configuration": {"length_bytes": 8}}, "hi"),
-        ({"name": "numpy.fixed_length_ucs4", "configuration": {"length_bytes": 16}}, "hihi"),
+        (FixedLengthUnicode(length=0), ""),
+        (FixedLengthUnicode(length=2), "hi"),
+        (FixedLengthUnicode(length=4), "hihi"),
+    )
+
+    cast_value_params = (
+        (FixedLengthUnicode(length=0), "", np.str_("")),
+        (FixedLengthUnicode(length=2), "hi", np.str_("hi")),
+        (FixedLengthUnicode(length=4), "hihi", np.str_("hihi")),
     )
 
 
@@ -159,3 +187,30 @@ class TestStructured(_TestZDType):
         },
         {"name": "invalid_name"},
     )
+
+    scalar_v2_params = (
+        (Structured(fields=(("field1", Int32()), ("field2", Float64()))), "AQAAAAAAAAAAAPA/"),
+        (Structured(fields=(("field1", Float16()), ("field2", Int32()))), "AQAAAAAA"),
+    )
+    scalar_v3_params = (
+        (Structured(fields=(("field1", Int32()), ("field2", Float64()))), "AQAAAAAAAAAAAPA/"),
+        (Structured(fields=(("field1", Int64()), ("field2", Int32()))), "AQAAAAAAAAAAAPA/"),
+    )
+
+    cast_value_params = (
+        (
+            Structured(fields=(("field1", Int32()), ("field2", Float64()))),
+            (1, 2.0),
+            np.array((1, 2.0), dtype=[("field1", np.int32), ("field2", np.float64)]),
+        ),
+        (
+            Structured(fields=(("field1", Int64()), ("field2", Int32()))),
+            (3, 4.5),
+            np.array((3, 4.5), dtype=[("field1", np.int64), ("field2", np.int32)]),
+        ),
+    )
+
+    def scalar_equals(self, scalar1: Any, scalar2: Any) -> bool:
+        if hasattr(scalar1, "shape") and hasattr(scalar2, "shape"):
+            return np.array_equal(scalar1, scalar2)
+        return super().scalar_equals(scalar1, scalar2)
diff --git a/tests/test_dtype/test_npy/test_string.py b/tests/test_dtype/test_npy/test_string.py
index fbb0aaa86d..c87f538be5 100644
--- a/tests/test_dtype/test_npy/test_string.py
+++ b/tests/test_dtype/test_npy/test_string.py
@@ -8,8 +8,8 @@
 if _NUMPY_SUPPORTS_VLEN_STRING:
 
     class TestVariableLengthString(_TestZDType):
-        test_cls = VariableLengthString
-        valid_dtype = (np.dtypes.StringDType(),)
+        test_cls = VariableLengthString  # type: ignore[assignment]
+        valid_dtype = (np.dtypes.StringDType(),)  # type: ignore[assignment]
         invalid_dtype = (
             np.dtype(np.int8),
             np.dtype(np.float64),
@@ -27,10 +27,21 @@ class TestVariableLengthString(_TestZDType):
             {"name": "invalid_name"},
         )
 
+        scalar_v2_params = ((VariableLengthString(), ""), (VariableLengthString(), "hi"))
+        scalar_v3_params = (
+            (VariableLengthString(), ""),
+            (VariableLengthString(), "hi"),
+        )
+
+        cast_value_params = (
+            (VariableLengthString(), "", np.str_("")),
+            (VariableLengthString(), "hi", np.str_("hi")),
+        )
+
 else:
 
-    class TestVariableLengthString(_TestZDType):
-        test_cls = VariableLengthString
+    class TestVariableLengthString(_TestZDType):  # type: ignore[no-redef]
+        test_cls = VariableLengthString  # type: ignore[assignment]
         valid_dtype = (np.dtype("O"),)
         invalid_dtype = (
             np.dtype(np.int8),
@@ -48,3 +59,14 @@ class TestVariableLengthString(_TestZDType):
             {"name": "numpy.variable_length_utf8", "configuration": {"invalid_key": "value"}},
             {"name": "invalid_name"},
         )
+
+        scalar_v2_params = ((VariableLengthString(), ""), (VariableLengthString(), "hi"))
+        scalar_v3_params = (
+            (VariableLengthString(), ""),
+            (VariableLengthString(), "hi"),
+        )
+
+        cast_value_params = (
+            (VariableLengthString(), "", np.str_("")),
+            (VariableLengthString(), "hi", np.str_("hi")),
+        )
diff --git a/tests/test_dtype/test_npy/test_time.py b/tests/test_dtype/test_npy/test_time.py
index 2a8ff6ac98..f8f8b5ae47 100644
--- a/tests/test_dtype/test_npy/test_time.py
+++ b/tests/test_dtype/test_npy/test_time.py
@@ -1,15 +1,32 @@
 from __future__ import annotations
 
 import re
+from typing import get_args
 
 import numpy as np
 import pytest
 
 from tests.test_dtype.test_wrapper import _TestZDType
-from zarr.core.dtype.npy.time import DateTime64, TimeDelta64
+from zarr.core.dtype.npy.common import DateTimeUnit
+from zarr.core.dtype.npy.time import DateTime64, TimeDelta64, datetime_from_int
 
 
-class TestDateTime64(_TestZDType):
+class _TestTimeBase(_TestZDType):
+    def json_scalar_equals(self, scalar1: object, scalar2: object) -> bool:
+        # This method gets overridden here to support the equivalency between NaT and
+        # -9223372036854775808 fill values
+        nat_scalars = (-9223372036854775808, "NaT")
+        if scalar1 in nat_scalars and scalar2 in nat_scalars:
+            return True
+        return scalar1 == scalar2
+
+    def scalar_equals(self, scalar1: object, scalar2: object) -> bool:
+        if np.isnan(scalar1) and np.isnan(scalar2):  # type: ignore[call-overload]
+            return True
+        return super().scalar_equals(scalar1, scalar2)
+
+
+class TestDateTime64(_TestTimeBase):
     test_cls = DateTime64
     valid_dtype = (np.dtype("datetime64[10ns]"), np.dtype("datetime64[us]"), np.dtype("datetime64"))
     invalid_dtype = (
@@ -32,8 +49,23 @@ class TestDateTime64(_TestZDType):
         {"name": "datetime64", "configuration": {"unit": 123}},
     )
 
+    scalar_v2_params = (
+        (DateTime64(unit="ns", scale_factor=1), 1),
+        (DateTime64(unit="ns", scale_factor=1), "NaT"),
+    )
+    scalar_v3_params = (
+        (DateTime64(unit="ns", scale_factor=1), 1),
+        (DateTime64(unit="ns", scale_factor=1), "NaT"),
+    )
 
-class TestTimeDelta64(_TestZDType):
+    cast_value_params = (
+        (DateTime64(unit="Y", scale_factor=1), "1", np.datetime64("1", "Y")),
+        (DateTime64(unit="s", scale_factor=1), "2005-02-25", np.datetime64("2005-02-25", "s")),
+        (DateTime64(unit="ns", scale_factor=1), "NaT", np.datetime64("NaT")),
+    )
+
+
+class TestTimeDelta64(_TestTimeBase):
     test_cls = TimeDelta64
     valid_dtype = (np.dtype("timedelta64[ns]"), np.dtype("timedelta64[us]"))
     invalid_dtype = (
@@ -57,6 +89,20 @@ class TestTimeDelta64(_TestZDType):
         {"name": "timedelta64", "configuration": {"unit": 123}},
     )
 
+    scalar_v2_params = (
+        (TimeDelta64(unit="ns", scale_factor=1), 1),
+        (TimeDelta64(unit="ns", scale_factor=1), "NaT"),
+    )
+    scalar_v3_params = (
+        (TimeDelta64(unit="ns", scale_factor=1), 1),
+        (TimeDelta64(unit="ns", scale_factor=1), "NaT"),
+    )
+
+    cast_value_params = (
+        (TimeDelta64(unit="ns", scale_factor=1), "1", np.timedelta64(1, "ns")),
+        (TimeDelta64(unit="ns", scale_factor=1), "NaT", np.timedelta64("NaT")),
+    )
+
 
 def test_time_invalid_unit() -> None:
     """
@@ -92,3 +138,14 @@ def test_time_scale_factor_too_high() -> None:
         DateTime64(scale_factor=scale_factor)
     with pytest.raises(ValueError, match=msg):
         TimeDelta64(scale_factor=scale_factor)
+
+
+@pytest.mark.parametrize("unit", get_args(DateTimeUnit))
+@pytest.mark.parametrize("scale_factor", [1, 10])
+@pytest.mark.parametrize("value", [0, 1, 10])
+def test_datetime_from_int(unit: DateTimeUnit, scale_factor: int, value: int) -> None:
+    """
+    Test datetime_from_int.
+    """
+    expected = np.int64(value).view(f"datetime64[{scale_factor}{unit}]")
+    assert datetime_from_int(value, unit=unit, scale_factor=scale_factor) == expected
diff --git a/tests/test_dtype/test_wrapper.py b/tests/test_dtype/test_wrapper.py
index defd3fffc5..ddf43524e0 100644
--- a/tests/test_dtype/test_wrapper.py
+++ b/tests/test_dtype/test_wrapper.py
@@ -5,10 +5,8 @@
 if TYPE_CHECKING:
     from zarr.core.dtype.wrapper import TBaseDType, TBaseScalar, ZDType
 
-import pytest
-import requests
-
 
+"""
 class _TestZDTypeSchema:
     # subclasses define the URL for the schema, if available
     schema_url: ClassVar[str] = ""
@@ -21,11 +19,12 @@ def get_schema(self) -> object:
 
     def test_schema(self, schema: json_schema.Schema) -> None:
         assert schema.is_valid(self.test_cls.to_json(zarr_format=2))
+"""
 
 
 class _TestZDType:
     test_cls: type[ZDType[TBaseDType, TBaseScalar]]
-
+    scalar_type: ClassVar[type[TBaseScalar]]
     valid_dtype: ClassVar[tuple[TBaseDType, ...]] = ()
     invalid_dtype: ClassVar[tuple[TBaseDType, ...]] = ()
 
@@ -42,6 +41,18 @@ class _TestZDType:
     scalar_v2_params: ClassVar[tuple[tuple[Any, Any], ...]] = ()
     scalar_v3_params: ClassVar[tuple[tuple[Any, Any], ...]] = ()
 
+    cast_value_params: ClassVar[tuple[tuple[Any, Any, Any], ...]]
+
+    def json_scalar_equals(self, scalar1: object, scalar2: object) -> bool:
+        # An equality check for json-encoded scalars. This defaults to regular equality,
+        # but some classes may need to override this for special cases
+        return scalar1 == scalar2
+
+    def scalar_equals(self, scalar1: object, scalar2: object) -> bool:
+        # An equality check for scalars. This defaults to regular equality,
+        # but some classes may need to override this for special cases
+        return scalar1 == scalar2
+
     def test_check_dtype_valid(self, valid_dtype: object) -> None:
         assert self.test_cls.check_dtype(valid_dtype)  # type: ignore[arg-type]
 
@@ -60,14 +71,17 @@ def test_from_json_roundtrip_v3(self, valid_json_v3: Any) -> None:
         zdtype = self.test_cls.from_json(valid_json_v3, zarr_format=3)
         assert zdtype.to_json(zarr_format=3) == valid_json_v3
 
-    def test_scalar_roundtrip_v2(self, scalar_v2_params: Any) -> None:
-        dtype_json, scalar_json = scalar_v2_params
-        zdtype = self.test_cls.from_json(dtype_json, zarr_format=2)
+    def test_scalar_roundtrip_v2(self, scalar_v2_params: tuple[Any, Any]) -> None:
+        zdtype, scalar_json = scalar_v2_params
         scalar = zdtype.from_json_value(scalar_json, zarr_format=2)
-        assert scalar_json == zdtype.to_json_value(scalar, zarr_format=2)
+        assert self.json_scalar_equals(scalar_json, zdtype.to_json_value(scalar, zarr_format=2))
 
-    def test_scalar_roundtrip_v3(self, scalar_v3_params: Any) -> None:
-        dtype_json, scalar_json = scalar_v3_params
-        zdtype = self.test_cls.from_json(dtype_json, zarr_format=3)
+    def test_scalar_roundtrip_v3(self, scalar_v3_params: tuple[Any, Any]) -> None:
+        zdtype, scalar_json = scalar_v3_params
         scalar = zdtype.from_json_value(scalar_json, zarr_format=3)
-        assert scalar_json == zdtype.to_json_value(scalar, zarr_format=3)
+        assert self.json_scalar_equals(scalar_json, zdtype.to_json_value(scalar, zarr_format=3))
+
+    def test_cast_value(self, cast_value_params: tuple[Any, Any, Any]) -> None:
+        zdtype, value, expected = cast_value_params
+        observed = zdtype.cast_value(value)
+        assert self.scalar_equals(expected, observed)
diff --git a/tests/test_properties.py b/tests/test_properties.py
index 15dd701582..68427dd8fe 100644
--- a/tests/test_properties.py
+++ b/tests/test_properties.py
@@ -75,6 +75,7 @@ def deep_equal(a: Any, b: Any) -> bool:
     return a == b
 
 
+@settings(deadline=300)
 @given(data=st.data(), zarr_format=zarr_formats)
 def test_array_roundtrip(data: st.DataObject, zarr_format: int) -> None:
     nparray = data.draw(numpy_arrays(zarr_formats=st.just(zarr_format)))

From 4ab1c58722297a526ab79c5936c5726b97d351fa Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Tue, 13 May 2025 13:11:53 +0200
Subject: [PATCH 080/130] use relative link for changes

---
 changes/2874.feature.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/changes/2874.feature.rst b/changes/2874.feature.rst
index 26eda3a257..d0adcd6533 100644
--- a/changes/2874.feature.rst
+++ b/changes/2874.feature.rst
@@ -1,2 +1,2 @@
 Adds zarr-specific data type classes. This replaces the direct use of numpy data types for zarr
-v2 and a fixed set of string enums for zarr v3. For more on this new feature, see the `documentation <https://zarr.readthedocs.io/en/stable/user-guide/data_types.html>`_
\ No newline at end of file
+v2 and a fixed set of string enums for zarr v3. For more on this new feature, see the `documentation documentation </user-guide/data_types.html>`_
\ No newline at end of file

From e4c89f30b874e53b47f7d36454a8cd428c35f64f Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Tue, 13 May 2025 13:12:23 +0200
Subject: [PATCH 081/130] typo

---
 changes/2874.feature.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/changes/2874.feature.rst b/changes/2874.feature.rst
index d0adcd6533..50634e5395 100644
--- a/changes/2874.feature.rst
+++ b/changes/2874.feature.rst
@@ -1,2 +1,2 @@
 Adds zarr-specific data type classes. This replaces the direct use of numpy data types for zarr
-v2 and a fixed set of string enums for zarr v3. For more on this new feature, see the `documentation documentation </user-guide/data_types.html>`_
\ No newline at end of file
+v2 and a fixed set of string enums for zarr v3. For more on this new feature, see the `documentation </user-guide/data_types.html>`_
\ No newline at end of file

From e386c2bfa93b0583d2ac6a4247c43950489ccd64 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Tue, 13 May 2025 13:24:08 +0200
Subject: [PATCH 082/130] make bytes codec dtype logic a bit more literate

---
 src/zarr/codecs/bytes.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/zarr/codecs/bytes.py b/src/zarr/codecs/bytes.py
index 6c28bfe543..a87df060e7 100644
--- a/src/zarr/codecs/bytes.py
+++ b/src/zarr/codecs/bytes.py
@@ -58,7 +58,10 @@ def to_dict(self) -> dict[str, JSON]:
             return {"name": "bytes", "configuration": {"endian": self.endian.value}}
 
     def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self:
-        if array_spec.dtype.to_dtype().itemsize == 1:
+        # Note: this check is numpy-dtype-specific
+        # For single-byte (e.g., uint8) or 0-byte (e.g., S0) dtypes,
+        # endianness does not apply.
+        if array_spec.dtype.to_dtype().itemsize < 2:
             if self.endian is not None:
                 return replace(self, endian=None)
         elif self.endian is None:
@@ -77,7 +80,8 @@ async def _decode_single(
         endian_str = cast(
             "Endianness | None", self.endian.value if self.endian is not None else None
         )
-        dtype = chunk_spec.dtype.to_dtype().newbyteorder(endianness_to_numpy_str(endian_str))
+        new_byte_order = endianness_to_numpy_str(endian_str)
+        dtype = chunk_spec.dtype.to_dtype().newbyteorder(new_byte_order)
 
         as_array_like = chunk_bytes.as_array_like()
         if isinstance(as_array_like, NDArrayLike):

From 703192cae67c9f064604403a20056dcfb30a8d1a Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Tue, 13 May 2025 13:25:05 +0200
Subject: [PATCH 083/130] increase deadline to 500ms

---
 tests/test_properties.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_properties.py b/tests/test_properties.py
index 68427dd8fe..2809e9564b 100644
--- a/tests/test_properties.py
+++ b/tests/test_properties.py
@@ -75,7 +75,7 @@ def deep_equal(a: Any, b: Any) -> bool:
     return a == b
 
 
-@settings(deadline=300)
+@settings(deadline=500)
 @given(data=st.data(), zarr_format=zarr_formats)
 def test_array_roundtrip(data: st.DataObject, zarr_format: int) -> None:
     nparray = data.draw(numpy_arrays(zarr_formats=st.just(zarr_format)))

From 0fab5e514f1840fb29819e0fe20a89de2cde7af1 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Tue, 13 May 2025 13:40:20 +0200
Subject: [PATCH 084/130] fewer commented sections of problematic
 lru_store_cache section of the sharding codecs

---
 src/zarr/codecs/sharding.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/src/zarr/codecs/sharding.py b/src/zarr/codecs/sharding.py
index 779cbc6f88..5c08815979 100644
--- a/src/zarr/codecs/sharding.py
+++ b/src/zarr/codecs/sharding.py
@@ -357,10 +357,13 @@ def __init__(
         object.__setattr__(self, "index_location", index_location_parsed)
 
         # Use instance-local lru_cache to avoid memory leaks
-        # TODO: fix these when we don't get hashability errors for certain numpy dtypes
+
+        # numpy void scalars are not hashable, which means an array spec with a fill value that is
+        # a numpy void scalar will break the lru_cache. This is commented for now but should be
+        # fixed
         # object.__setattr__(self, "_get_chunk_spec", lru_cache()(self._get_chunk_spec))
-        # object.__setattr__(self, "_get_index_chunk_spec", lru_cache()(self._get_index_chunk_spec))
-        # object.__setattr__(self, "_get_chunks_per_shard", lru_cache()(self._get_chunks_per_shard))
+        object.__setattr__(self, "_get_index_chunk_spec", lru_cache()(self._get_index_chunk_spec))
+        object.__setattr__(self, "_get_chunks_per_shard", lru_cache()(self._get_chunks_per_shard))
 
     # todo: typedict return type
     def __getstate__(self) -> dict[str, Any]:
@@ -374,7 +377,7 @@ def __setstate__(self, state: dict[str, Any]) -> None:
         object.__setattr__(self, "index_location", parse_index_location(config["index_location"]))
 
         # Use instance-local lru_cache to avoid memory leaks
-        object.__setattr__(self, "_get_chunk_spec", lru_cache()(self._get_chunk_spec))
+        # object.__setattr__(self, "_get_chunk_spec", lru_cache()(self._get_chunk_spec))
         object.__setattr__(self, "_get_index_chunk_spec", lru_cache()(self._get_index_chunk_spec))
         object.__setattr__(self, "_get_chunks_per_shard", lru_cache()(self._get_chunks_per_shard))
 

From 2f945bf25186a6fa8401348565025d3e374023a2 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Tue, 13 May 2025 13:56:49 +0200
Subject: [PATCH 085/130] add link to gh issue about lru_cache for sharding
 codec

---
 src/zarr/codecs/sharding.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/zarr/codecs/sharding.py b/src/zarr/codecs/sharding.py
index 5c08815979..15036e88d2 100644
--- a/src/zarr/codecs/sharding.py
+++ b/src/zarr/codecs/sharding.py
@@ -360,7 +360,7 @@ def __init__(
 
         # numpy void scalars are not hashable, which means an array spec with a fill value that is
         # a numpy void scalar will break the lru_cache. This is commented for now but should be
-        # fixed
+        # fixed. See https://github.com/zarr-developers/zarr-python/issues/3054
         # object.__setattr__(self, "_get_chunk_spec", lru_cache()(self._get_chunk_spec))
         object.__setattr__(self, "_get_index_chunk_spec", lru_cache()(self._get_index_chunk_spec))
         object.__setattr__(self, "_get_chunks_per_shard", lru_cache()(self._get_chunks_per_shard))

From 63a6af4392c47802ae8ec5f026cae5a1503c3ddd Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Tue, 13 May 2025 14:28:22 +0200
Subject: [PATCH 086/130] attempt to speed up hypothesis tests by reducing max
 array size

---
 src/zarr/testing/strategies.py | 4 +++-
 tests/test_properties.py       | 1 -
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/zarr/testing/strategies.py b/src/zarr/testing/strategies.py
index 6c3abfca85..4e5c9536fc 100644
--- a/src/zarr/testing/strategies.py
+++ b/src/zarr/testing/strategies.py
@@ -120,7 +120,9 @@ def clear_store(x: Store) -> Store:
 compressors = st.sampled_from([None, "default"])
 zarr_formats: st.SearchStrategy[ZarrFormat] = st.sampled_from([3, 2])
 # We de-prioritize arrays having dim sizes 0, 1, 2
-array_shapes = npst.array_shapes(max_dims=4, min_side=3) | npst.array_shapes(max_dims=4, min_side=0)
+array_shapes = npst.array_shapes(max_dims=4, min_side=3, max_side=5) | npst.array_shapes(
+    max_dims=4, min_side=0
+)
 
 
 @st.composite
diff --git a/tests/test_properties.py b/tests/test_properties.py
index 2809e9564b..15dd701582 100644
--- a/tests/test_properties.py
+++ b/tests/test_properties.py
@@ -75,7 +75,6 @@ def deep_equal(a: Any, b: Any) -> bool:
     return a == b
 
 
-@settings(deadline=500)
 @given(data=st.data(), zarr_format=zarr_formats)
 def test_array_roundtrip(data: st.DataObject, zarr_format: int) -> None:
     nparray = data.draw(numpy_arrays(zarr_formats=st.just(zarr_format)))

From 56e7c84abd57203146c9d6f8dc9b62e1bf80dac1 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Tue, 13 May 2025 15:57:30 +0200
Subject: [PATCH 087/130] clean up docs

---
 docs/user-guide/data_types.rst | 116 +++++++++++++++++++++++----------
 1 file changed, 80 insertions(+), 36 deletions(-)

diff --git a/docs/user-guide/data_types.rst b/docs/user-guide/data_types.rst
index a281b349de..81a09a6485 100644
--- a/docs/user-guide/data_types.rst
+++ b/docs/user-guide/data_types.rst
@@ -5,8 +5,10 @@ Zarr's data type model
 ----------------------
 
 Every Zarr array has a "data type", which defines the meaning and physical layout of the
-array's elements. Zarr is heavily influenced by `NumPy <https://numpy.org/doc/stable/>`_, and
-Zarr-Python supports creating arrays with Numpy data types::
+array's elements. As Zarr Python is tightly integrated with `NumPy <https://numpy.org/doc/stable/>`_,
+it's easy to create arrays with NumPy data types:
+
+.. code-block:: python
 
   >>> import zarr
   >>> import numpy as np
@@ -14,58 +16,103 @@ Zarr-Python supports creating arrays with Numpy data types::
   >>> z
   <Array memory:... shape=(10,) dtype=uint8>
 
-Unlike Numpy arrays, Zarr arrays are designed to be persisted to storage and read by Zarr implementations in different programming languages.
-This means Zarr data types must be interpreted correctly when clients read an array. So each Zarr data type defines a procedure for
-encoding/decoding that data type to/from Zarr array metadata, and also encoding/decoding **instances** of that data type to/from
-array metadata. These serialization procedures depend on the Zarr format.
+Unlike NumPy arrays, Zarr arrays are designed to accessed by Zarr
+implementations in different programming languages. This means Zarr data types must be interpreted
+correctly when clients read an array. Each Zarr data type defines procedures for
+encoding and decoding both the data type itself, and scalars from that data type to and from Zarr array metadata. And these serialization procedures
+depend on the Zarr format.
 
 Data types in Zarr version 2
 -----------------------------
 
-Version 2 of the Zarr format defined its data types relative to `Numpy's data types <https://numpy.org/doc/2.1/reference/arrays.dtypes.html#data-type-objects-dtype>`_, and added a few non-Numpy data types as well.
-Thus the JSON identifier for a Numpy-compatible data type is just the Numpy ``str`` attribute of that dtype::
+Version 2 of the Zarr format defined its data types relative to
+`NumPy's data types <https://numpy.org/doc/2.1/reference/arrays.dtypes.html#data-type-objects-dtype>`_,
+and added a few non-NumPy data types as well. Thus the JSON identifier for a NumPy-compatible data
+type is just the NumPy ``str`` attribute of that data type:
+
+.. code-block:: python
 
-    >>> import zarr
-    >>> import numpy as np
-    >>> import json
-    >>> store = {}
-    >>> np_dtype = np.dtype('int64')
-    >>> z = zarr.create_array(store=store, shape=(1,), dtype=np_dtype, zarr_format=2)
-    >>> dtype_meta = json.loads(store['.zarray'].to_bytes())["dtype"]
-    >>> assert dtype_meta == np_dtype.str  # True
-    >>> dtype_meta
-    '<i8'
+  >>> import zarr
+  >>> import numpy as np
+  >>> import json
+  >>>
+  >>> store = {}
+  >>> np_dtype = np.dtype('int64')
+  >>> z = zarr.create_array(store=store, shape=(1,), dtype=np_dtype, zarr_format=2)
+  >>> dtype_meta = json.loads(store['.zarray'].to_bytes())["dtype"]
+  >>> dtype_meta
+  '<i8'
+  >>> assert dtype_meta == np_dtype.str
 
 .. note::
-   The ``<`` character in the data type metadata encodes the `endianness <https://numpy.org/doc/2.2/reference/generated/numpy.dtype.byteorder.html>`_, or "byte order", of the data type. Following Numpy's example,
-   in Zarr version 2 each data type has an endianness where applicable. However, Zarr version 3 data types do not store endianness information.
+   The ``<`` character in the data type metadata encodes the
+   `endianness <https://numpy.org/doc/2.2/reference/generated/numpy.dtype.byteorder.html>`_,
+   or "byte order", of the data type. Following NumPy's example,
+   in Zarr version 2 each data type has an endianness where applicable.
+   However, Zarr version 3 data types do not store endianness information.
+
+In addition to defining a representation of the data type itself (which in the example above was
+just a simple string ``"<i8"``), Zarr also
+defines a metadata representation for scalars associated with each data type. This is necessary
+because Zarr arrays have a ``JSON``-serializable ``fill_value`` attribute that defines a scalar value to use when reading
+uninitialized chunks of a Zarr array.
+Integer and float scalars are stored as ``JSON`` numbers, except for special floats like ``NaN``,
+positive infinity, and negative infinity, which are stored as strings.
+
+More broadly, each Zarr data type defines its own rules for how scalars of that type are stored in
+``JSON``.
 
-In addition to defining a representation of the data type itself (which in the example above was just a simple string ``"<i8"``), Zarr also
-defines a metadata representation of scalars associated with that data type. Integers are stored as ``JSON`` numbers,
-as are floats, with the caveat that `NaN`, positive infinity, and negative infinity are stored as special strings.
 
 Data types in Zarr version 3
 -----------------------------
 
 Zarr V3 brings several key changes to how data types are represented:
 
-- Zarr V3 identifies the basic data types as strings like ``int8``, ``int16``, etc. In Zarr V2 ``int8`` would represented as ``|i1``,  ``int16`` would be ``>i2`` **or** ``<i2``, depending on the endianness.
-- A Zarr V3 data type does not have endianness. This is a departure from Zarr V2, where multi-byte data types would be stored in ``JSON`` with an encoding that included endianness. Instead,
-  Zarr V3 requires that endianness, where applicable, is specified in the ``codecs`` attribute of array metadata.
-- Zarr V3 data types can also take the form of a ``JSON`` object like
-  ``{"name": "foo", "configuration": {"parameter": "value"}}``. This structure facilitates specifying data types that take parameters.
+- Zarr V3 identifies the basic data types as strings like ``"int8"``, ``"int16"``, etc.
 
+  By contrast, Zarr V2 uses the NumPy character code representation for data types:
+  In Zarr V2, ``int8`` is represented as ``"|i1"``.
+- A Zarr V3 data type does not have endianness. This is a departure from Zarr V2, where multi-byte
+  data types are defined with endianness information. Instead, Zarr V3 requires that endianness,
+  where applicable, is specified in the ``codecs`` attribute of array metadata.
+- While some Zarr V3 data types are identified by strings, others can be identified by a ``JSON``
+  object. For example, consider this specification of a ``datetime`` data type:
 
-Data types in Zarr-Python
+  .. code-block:: json
+
+    {
+      "name": "numpy.datetime64",
+      "configuration": {
+          "unit": "s",
+          "scale_factor": 10
+        }
+    }
+
+
+  Zarr V2 generally uses structured string representations to convey the same information. The
+  data type given in the previous example would be represented as the string ``">M[10s]"`` in
+  Zarr V2. This is more compact, but can be harder to parse.
+
+For more about data types in Zarr V3, see the
+`V3 specification <https://zarr-specs.readthedocs.io/en/latest/v3/data-types/index.html>`_.
+
+Data types in Zarr Python
 -------------------------
 
-The two Zarr formats that Zarr-Python supports specify data types in two different ways:
-data types in Zarr version 2 are encoded as Numpy-compatible strings, while data types in Zarr version
+The two Zarr formats that Zarr Python supports specify data types in two different ways:
+data types in Zarr version 2 are encoded as NumPy-compatible strings, while data types in Zarr version
 3 are encoded as either strings or ``JSON`` objects,
 and the Zarr V3 data types don't have any associated endianness information, unlike Zarr V2 data types.
 
-To abstract over these syntactical and semantic differences, Zarr-Python uses a class called `ZDType <../api/zarr/dtype/index.html#zarr.dtype.ZDType>`_ to wrap native data types (e.g., Numpy data types) and provide Zarr V2 and Zarr V3 compatibility routines.
-Each data type supported by Zarr-Python is modeled by a subclass of ``ZDType``, which provides an API for the following operations:
+To abstract over these syntactical and semantic differences, Zarr Python uses a class called
+`ZDType <../api/zarr/dtype/index.html#zarr.dtype.ZDType>`_ provide Zarr V2 and Zarr V3 compatibility
+routines for ""native" data types. In this context, a "native" data type is a Python class,
+typically defined in another library, that models an array's data type. For example, ``np.uint8`` is a native
+data type defined in NumPy, which Zarr Python wraps with a ``ZDType`` instance called
+`UInt8 <../api/zarr/dtype/index.html#zarr.dtype.ZDType>`_.
+
+Each data type supported by Zarr Python is modeled by ``ZDType`` subclass, which provides an
+API for the following operations:
 
 - Wrapping / unwrapping a native data type
 - Encoding / decoding a data type to / from Zarr V2 and Zarr V3 array metadata.
@@ -104,7 +151,4 @@ Example Usage
 Custom Data Types
 ~~~~~~~~~~~~~~~~~
 
-Users can define custom data types by subclassing `ZDType` and implementing the required methods.
-Once defined, the custom data type can be registered with Zarr-Python to enable seamless integration with the library.
-
 <TODO: example of defining a custom data type>
\ No newline at end of file

From eee0d7bc8c7b8444a3bcd9e9a545c4f89db5dcb2 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Tue, 13 May 2025 16:01:52 +0200
Subject: [PATCH 088/130] remove placeholder

---
 docs/user-guide/data_types.rst | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/docs/user-guide/data_types.rst b/docs/user-guide/data_types.rst
index 81a09a6485..ff43dd8d19 100644
--- a/docs/user-guide/data_types.rst
+++ b/docs/user-guide/data_types.rst
@@ -147,8 +147,3 @@ Example Usage
     # Deserialize a scalar value
     scalar_value = int8.from_json_value(42, zarr_format=3)
     assert scalar_value == np.int8(42)
-
-Custom Data Types
-~~~~~~~~~~~~~~~~~
-
-<TODO: example of defining a custom data type>
\ No newline at end of file

From 1dc8e722b80e4f6668ab9121b0370dde84fc5ba4 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Tue, 13 May 2025 16:36:07 +0200
Subject: [PATCH 089/130] make final example section doctested and more
 readable

---
 docs/user-guide/data_types.rst | 58 +++++++++++++++++++++++-----------
 1 file changed, 40 insertions(+), 18 deletions(-)

diff --git a/docs/user-guide/data_types.rst b/docs/user-guide/data_types.rst
index ff43dd8d19..777a69816e 100644
--- a/docs/user-guide/data_types.rst
+++ b/docs/user-guide/data_types.rst
@@ -9,7 +9,6 @@ array's elements. As Zarr Python is tightly integrated with `NumPy <https://nump
 it's easy to create arrays with NumPy data types:
 
 .. code-block:: python
-
   >>> import zarr
   >>> import numpy as np
   >>> z = zarr.create_array(store={}, shape=(10,), dtype=np.dtype('uint8'))
@@ -122,28 +121,51 @@ API for the following operations:
 Example Usage
 ~~~~~~~~~~~~~
 
+Create a ``ZDType`` from a native data type:
+
+.. code-block:: python
+
+  >>> from zarr.core.dtype import Int8
+  >>> import numpy as np
+  >>> int8 = Int8.from_dtype(np.dtype('int8'))
+
+Convert back to native data type:
+
+.. code-block:: python
+
+  >>> native_dtype = int8.to_dtype()
+  >>> assert native_dtype == np.dtype('int8')
+
+Get the default scalar value for the data type:
+
 .. code-block:: python
 
-    from zarr.core.dtype.wrapper import Int8
+  >>> default_value = int8.default_value()
+  >>> assert default_value == np.int8(0)
 
-    # Create a ZDType instance from a native dtype
-    int8 = Int8.from_dtype(np.dtype('int8'))
 
-    # Convert back to native dtype
-    native_dtype = int8.to_dtype()
-    assert native_dtype == np.dtype('int8')
+Serialize to JSON for Zarr V2 and V3
 
-    # Get the default value
-    default_value = int8.default_value()
-    assert default_value == np.int8(0)
+.. code-block:: python
 
-    # Serialize to JSON
-    json_representation = int8.to_json(zarr_format=3)
+  >>> json_v2 = int8.to_json(zarr_format=2)
+  >>> json_v2
+  '|i1'
+  >>> json_v3 = int8.to_json(zarr_format=3)
+  >>> json_v3
+  'int8'
 
-    # Serialize a scalar value
-    json_value = int8.to_json_value(42, zarr_format=3)
-    assert json_value == 42
+Serialize a scalar value to JSON:
+
+.. code-block:: python
+
+  >>> json_value = int8.to_json_value(42, zarr_format=3)
+  >>> json_value
+  42
+
+Deserialize a scalar value from JSON:
+
+.. code-block:: python
 
-    # Deserialize a scalar value
-    scalar_value = int8.from_json_value(42, zarr_format=3)
-    assert scalar_value == np.int8(42)
+  >>> scalar_value = int8.from_json_value(42, zarr_format=3)
+  >>> assert scalar_value == np.int8(42)

From 13ca2304b1d9e3cf576bd53c367f03408d9f5653 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Tue, 13 May 2025 16:40:57 +0200
Subject: [PATCH 090/130] revert change to auto chunking

---
 docs/user-guide/performance.rst | 2 +-
 src/zarr/core/chunk_grids.py    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/user-guide/performance.rst b/docs/user-guide/performance.rst
index 40882fbf1f..aa380735d5 100644
--- a/docs/user-guide/performance.rst
+++ b/docs/user-guide/performance.rst
@@ -52,7 +52,7 @@ a chunk shape is based on simple heuristics and may be far from optimal. E.g.::
 
    >>> z4 = zarr.create_array(store={}, shape=(10000, 10000), chunks='auto', dtype='int32')
    >>> z4.chunks
-   (313, 625)
+   (625, 625)
 
 If you know you are always going to be loading the entire array into memory, you
 can turn off chunks by providing ``chunks`` equal to ``shape``, in which case there
diff --git a/src/zarr/core/chunk_grids.py b/src/zarr/core/chunk_grids.py
index 74bf9b6ba8..6701aca182 100644
--- a/src/zarr/core/chunk_grids.py
+++ b/src/zarr/core/chunk_grids.py
@@ -63,7 +63,7 @@ def _guess_chunks(
     """
     if isinstance(shape, int):
         shape = (shape,)
-    typesize = max(typesize, 8)
+    typesize = max(typesize, 1)
     ndims = len(shape)
     # require chunks to have non-zero length for all dimensions
     chunks = np.maximum(np.array(shape, dtype="=f8"), 1)

From 2a42205ad3ae8eae3ecf4b7a76189c50335ae9a7 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Tue, 13 May 2025 16:46:21 +0200
Subject: [PATCH 091/130] revert quotation of literal type

---
 src/zarr/core/array_spec.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/zarr/core/array_spec.py b/src/zarr/core/array_spec.py
index 279bf6edf0..5d4321da82 100644
--- a/src/zarr/core/array_spec.py
+++ b/src/zarr/core/array_spec.py
@@ -63,7 +63,7 @@ def from_dict(cls, data: ArrayConfigParams) -> Self:
         """
         kwargs_out: ArrayConfigParams = {}
         for f in fields(ArrayConfig):
-            field_name = cast("Literal['order', 'write_empty_chunks']", f.name)
+            field_name = cast(Literal["order", "write_empty_chunks"], f.name)
             if field_name not in data:
                 kwargs_out[field_name] = zarr_config.get(f"array.{field_name}")
             else:

From 3f775c83665b24bbb7683393f381523c61a4e8cb Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Tue, 13 May 2025 18:17:55 +0200
Subject: [PATCH 092/130] lint

---
 src/zarr/core/dtype/__init__.py | 52 ++++++++++++++++++++++++---------
 src/zarr/core/dtype/npy/time.py |  8 ++---
 tests/test_config.py            |  6 +++-
 tests/test_dtype/conftest.py    |  2 +-
 tests/test_dtype/test_dtype.py  |  0
 tests/test_dtype_registry.py    | 35 ++++++++++++++++++++--
 6 files changed, 81 insertions(+), 22 deletions(-)
 delete mode 100644 tests/test_dtype/test_dtype.py

diff --git a/src/zarr/core/dtype/__init__.py b/src/zarr/core/dtype/__init__.py
index 1a18849a13..a8cdfc0cbc 100644
--- a/src/zarr/core/dtype/__init__.py
+++ b/src/zarr/core/dtype/__init__.py
@@ -1,6 +1,6 @@
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, TypeAlias, get_args
+from typing import TYPE_CHECKING, TypeAlias
 
 from zarr.core.dtype.common import DataTypeValidationError
 from zarr.core.dtype.npy.bool import Bool
@@ -30,8 +30,10 @@
 from zarr.core.dtype.wrapper import TBaseDType, TBaseScalar, ZDType
 
 __all__ = [
+    "Bool",
     "Complex64",
     "Complex128",
+    "DataTypeRegistry",
     "DataTypeValidationError",
     "DateTime64",
     "FixedLengthAscii",
@@ -45,6 +47,8 @@
     "Int32",
     "Int64",
     "Structured",
+    "TBaseDType",
+    "TBaseScalar",
     "TimeDelta64",
     "TimeDelta64",
     "UInt8",
@@ -59,25 +63,47 @@
 
 data_type_registry = DataTypeRegistry()
 
-INTEGER_DTYPE = Int8 | Int16 | Int32 | Int64 | UInt8 | UInt16 | UInt32 | UInt64
-FLOAT_DTYPE = Float16 | Float32 | Float64
-COMPLEX_DTYPE = Complex64 | Complex128
-STRING_DTYPE = FixedLengthUnicode | VariableLengthString | FixedLengthAscii
-DTYPE = (
+IntegerDType = Int8 | Int16 | Int32 | Int64 | UInt8 | UInt16 | UInt32 | UInt64
+INTEGER_DTYPE = Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64
+
+FloatDType = Float16 | Float32 | Float64
+FLOAT_DTYPE = Float16, Float32, Float64
+
+ComplexFloatDType = Complex64 | Complex128
+COMPLEX_FLOAT_DTYPE = Complex64, Complex128
+
+StringDType = FixedLengthUnicode | VariableLengthString | FixedLengthAscii
+STRING_DTYPE = FixedLengthUnicode, VariableLengthString, FixedLengthAscii
+
+TimeDType = DateTime64 | TimeDelta64
+TIME_DTYPE = DateTime64, TimeDelta64
+
+AnyDType = (
     Bool
-    | INTEGER_DTYPE
-    | FLOAT_DTYPE
-    | COMPLEX_DTYPE
-    | STRING_DTYPE
+    | IntegerDType
+    | FloatDType
+    | ComplexFloatDType
+    | StringDType
     | FixedLengthBytes
     | Structured
-    | DateTime64
-    | TimeDelta64
+    | TimeDType
+)
+# mypy has trouble inferring the type of variablelengthstring dtype, because its class definition
+# depends on the installed numpy version. That's why the type: ignore statement is needed here.
+ANY_DTYPE: tuple[type[ZDType[TBaseDType, TBaseScalar]], ...] = (  # type: ignore[assignment]
+    Bool,
+    *INTEGER_DTYPE,
+    *FLOAT_DTYPE,
+    *COMPLEX_FLOAT_DTYPE,
+    *STRING_DTYPE,
+    FixedLengthBytes,
+    Structured,
+    *TIME_DTYPE,
 )
 
 ZDTypeLike: TypeAlias = npt.DTypeLike | ZDType[TBaseDType, TBaseScalar] | dict[str, JSON]
 
-for dtype in get_args(DTYPE):
+for dtype in ANY_DTYPE:
     data_type_registry.register(dtype._zarr_v3_name, dtype)
 
 
diff --git a/src/zarr/core/dtype/npy/time.py b/src/zarr/core/dtype/npy/time.py
index bbdd41d13f..ea44d76b56 100644
--- a/src/zarr/core/dtype/npy/time.py
+++ b/src/zarr/core/dtype/npy/time.py
@@ -173,7 +173,7 @@ class TimeDelta64(TimeDTypeBase[np.dtypes.TimeDelta64DType, np.timedelta64], Has
     unit for ``TimeDelta64`` is optional.
     """
 
-    dtype_cls = np.dtypes.TimeDelta64DType
+    dtype_cls = np.dtypes.TimeDelta64DType  # type: ignore[assignment]
     _zarr_v3_name = "numpy.timedelta64"
     _zarr_v2_names = (">m8", "<m8")
     _numpy_name = "timedelta64"
@@ -185,7 +185,7 @@ def default_value(self) -> np.timedelta64:
 
     def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.timedelta64:
         if check_json_int(data) or data == "NaT":
-            return self.to_dtype().type(data, f"{self.scale_factor}{self.unit}")
+            return self.to_dtype().type(data, f"{self.scale_factor}{self.unit}")  # type: ignore[arg-type]
         raise TypeError(f"Invalid type: {data}. Expected an integer.")  # pragma: no cover
 
     def _cast_value_unsafe(self, value: object) -> np.timedelta64:
@@ -220,7 +220,7 @@ def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[JSON]:
 
 @dataclass(frozen=True, kw_only=True, slots=True)
 class DateTime64(TimeDTypeBase[np.dtypes.DateTime64DType, np.datetime64], HasEndianness):
-    dtype_cls = np.dtypes.DateTime64DType
+    dtype_cls = np.dtypes.DateTime64DType  # type: ignore[assignment]
     _zarr_v3_name = "numpy.datetime64"
     _zarr_v2_names = (">M8", "<M8")
     _numpy_name = "datetime64"
@@ -232,7 +232,7 @@ def default_value(self) -> np.datetime64:
 
     def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.datetime64:
         if check_json_int(data) or data == "NaT":
-            return self.to_dtype().type(data, f"{self.scale_factor}{self.unit}")
+            return self.to_dtype().type(data, f"{self.scale_factor}{self.unit}")  # type: ignore[arg-type]
         raise TypeError(f"Invalid type: {data}. Expected an integer.")  # pragma: no cover
 
     def _cast_value_unsafe(self, value: object) -> np.datetime64:
diff --git a/tests/test_config.py b/tests/test_config.py
index f32b3e6840..58f88ec806 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -1,6 +1,6 @@
 import os
 from collections.abc import Iterable
-from typing import Any
+from typing import TYPE_CHECKING, Any
 from unittest import mock
 from unittest.mock import Mock
 
@@ -46,6 +46,9 @@
     TestNDArrayLike,
 )
 
+if TYPE_CHECKING:
+    from zarr.core.dtype.wrapper import ZDType
+
 
 def test_config_defaults_set() -> None:
     # regression test for available defaults
@@ -307,6 +310,7 @@ async def test_default_codecs(dtype_category: str) -> None:
     """
     Test that the default compressors are sensitive to the current setting of the config.
     """
+    zdtype: ZDType[Any, Any]
     if dtype_category == "variable-length-string":
         zdtype = VariableLengthString()
     else:
diff --git a/tests/test_dtype/conftest.py b/tests/test_dtype/conftest.py
index 9c7825c0d1..bf58a17556 100644
--- a/tests/test_dtype/conftest.py
+++ b/tests/test_dtype/conftest.py
@@ -24,7 +24,7 @@
 
 def pytest_generate_tests(metafunc: Any) -> None:
     """
-    pytest hook to parametrize class-scoped fixtures.
+    This is a pytest hook to parametrize class-scoped fixtures.
 
     This hook allows us to define class-scoped fixtures as class attributes and then
     generate the parametrize calls for pytest. This allows the fixtures to be
diff --git a/tests/test_dtype/test_dtype.py b/tests/test_dtype/test_dtype.py
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/tests/test_dtype_registry.py b/tests/test_dtype_registry.py
index 98380b86f7..aaca2f0862 100644
--- a/tests/test_dtype_registry.py
+++ b/tests/test_dtype_registry.py
@@ -11,16 +11,20 @@
 import zarr
 from zarr.core.config import config
 from zarr.core.dtype import (
-    DTYPE,
+    AnyDType,
     Bool,
+    DataTypeRegistry,
+    DateTime64,
     FixedLengthUnicode,
+    Int8,
+    Int16,
     TBaseDType,
     TBaseScalar,
     ZDType,
     data_type_registry,
     get_data_type_from_json,
+    parse_data_type,
 )
-from zarr.core.dtype.registry import DataTypeRegistry
 
 if TYPE_CHECKING:
     from collections.abc import Generator
@@ -117,7 +121,7 @@ def test_match_dtype_unique(
         that excludes the data type class being tested, and ensure that an instance of the wrapped data type
         fails to match anything in the registry
         """
-        for _cls in get_args(DTYPE):
+        for _cls in get_args(AnyDType):
             if _cls is not type(zdtype):
                 data_type_registry_fixture.register(_cls._zarr_v3_name, _cls)
 
@@ -156,3 +160,28 @@ def test_entrypoint_dtype(zarr_format: ZarrFormat) -> None:
     instance = TestDataType()
     dtype_json = instance.to_json(zarr_format=zarr_format)
     assert get_data_type_from_json(dtype_json, zarr_format=zarr_format) == instance
+
+
+@pytest.mark.parametrize(
+    ("dtype_params", "expected", "zarr_format"),
+    [
+        ("int8", Int8(), 3),
+        (Int8(), Int8(), 3),
+        (">i2", Int16(endianness="big"), 2),
+        ("datetime64[10s]", DateTime64(unit="s", scale_factor=10), 2),
+        (
+            {"name": "numpy.datetime64", "configuration": {"unit": "s", "scale_factor": 10}},
+            DateTime64(unit="s", scale_factor=10),
+            3,
+        ),
+    ],
+)
+def test_parse_data_type(
+    dtype_params: Any, expected: ZDType[Any, Any], zarr_format: ZarrFormat
+) -> None:
+    """
+    Test that parse_data_type accepts alternative representations of ZDType instances, and resolves
+    those inputs to the expected ZDType instance.
+    """
+    observed = parse_data_type(dtype_params, zarr_format=zarr_format)
+    assert observed == expected

From b525b8e53257982ae238172b2b3d82a755969fd1 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Tue, 13 May 2025 18:34:14 +0200
Subject: [PATCH 093/130] fix broken code block

---
 docs/user-guide/data_types.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/user-guide/data_types.rst b/docs/user-guide/data_types.rst
index 777a69816e..a4d8314a5e 100644
--- a/docs/user-guide/data_types.rst
+++ b/docs/user-guide/data_types.rst
@@ -9,6 +9,7 @@ array's elements. As Zarr Python is tightly integrated with `NumPy <https://nump
 it's easy to create arrays with NumPy data types:
 
 .. code-block:: python
+
   >>> import zarr
   >>> import numpy as np
   >>> z = zarr.create_array(store={}, shape=(10,), dtype=np.dtype('uint8'))

From ec94878746679070f59094ca5ac293831ef5fd5c Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Tue, 13 May 2025 18:57:02 +0200
Subject: [PATCH 094/130] specialize test to handle stringdtype changes coming
 in numpy 2.3

---
 tests/test_array.py | 29 +++++++++++++++++++++--------
 1 file changed, 21 insertions(+), 8 deletions(-)

diff --git a/tests/test_array.py b/tests/test_array.py
index dc81c7ea36..3108332201 100644
--- a/tests/test_array.py
+++ b/tests/test_array.py
@@ -47,6 +47,7 @@
 from zarr.core.dtype.npy.sized import (
     Structured,
 )
+from zarr.core.dtype.npy.string import VariableLengthString
 from zarr.core.dtype.npy.time import DateTime64, TimeDelta64
 from zarr.core.dtype.wrapper import ZDType
 from zarr.core.group import AsyncGroup
@@ -996,14 +997,26 @@ def test_dtype_forms(dtype: ZDType[Any, Any], store: Store, zarr_format: ZarrFor
 
         # Structured dtypes do not have a numpy string representation that uniquely identifies them
         if not isinstance(dtype, Structured):
-            c = zarr.create_array(
-                store,
-                name="c",
-                shape=(5,),
-                chunks=(5,),
-                dtype=dtype.to_dtype().str,
-                zarr_format=zarr_format,
-            )
+            if isinstance(dtype, VariableLengthString):
+                # in numpy 2.3, StringDType().str becomes the string 'StringDType()' which numpy
+                # does not accept as a string representation of the dtype.
+                c = zarr.create_array(
+                    store,
+                    name="c",
+                    shape=(5,),
+                    chunks=(5,),
+                    dtype=dtype.to_dtype().char,
+                    zarr_format=zarr_format,
+                )
+            else:
+                c = zarr.create_array(
+                    store,
+                    name="c",
+                    shape=(5,),
+                    chunks=(5,),
+                    dtype=dtype.to_dtype().str,
+                    zarr_format=zarr_format,
+                )
             assert a.dtype == c.dtype
 
     @staticmethod

From 3af98aa1c872a99e92d44de84a9fed7e504ef0cd Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Tue, 13 May 2025 21:56:57 +0200
Subject: [PATCH 095/130] add docstring to _TestZDType class

---
 tests/test_dtype/test_wrapper.py | 34 ++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/tests/test_dtype/test_wrapper.py b/tests/test_dtype/test_wrapper.py
index ddf43524e0..608e272690 100644
--- a/tests/test_dtype/test_wrapper.py
+++ b/tests/test_dtype/test_wrapper.py
@@ -23,6 +23,40 @@ def test_schema(self, schema: json_schema.Schema) -> None:
 
 
 class _TestZDType:
+    """
+    A base class for testing ZDType subclasses. This class works in conjunction with the custom
+    pytest collection function ``pytest_generate_tests`` defined in conftest.py, which applies the
+    following procedure when generating tests:
+
+    At test generation time, for each test fixture referenced by a method on this class
+    pytest will look for an attribute with the same name as that fixture. Pytest will assume that
+    this class attribute is a tuple of values to be used for generating a parametrized test fixture.
+
+    This means that child classes can, by using different values for these class attributes, have
+    customized test parametrization.
+
+    Attributes
+    ----------
+    test_cls : type[ZDType[TBaseDType, TBaseScalar]]
+        The ZDType subclass being tested.
+    scalar_type : ClassVar[type[TBaseScalar]]
+        The expected scalar type for the ZDType.
+    valid_dtype : ClassVar[tuple[TBaseDType, ...]]
+        A tuple of valid numpy dtypes for the ZDType.
+    invalid_dtype : ClassVar[tuple[TBaseDType, ...]]
+        A tuple of invalid numpy dtypes for the ZDType.
+    valid_json_v2 : ClassVar[tuple[str | dict[str, object] | list[object], ...]]
+        A tuple of valid JSON representations for Zarr format version 2.
+    invalid_json_v2 : ClassVar[tuple[str | dict[str, object] | list[object], ...]]
+        A tuple of invalid JSON representations for Zarr format version 2.
+    valid_json_v3 : ClassVar[tuple[str | dict[str, object], ...]]
+        A tuple of valid JSON representations for Zarr format version 3.
+    invalid_json_v3 : ClassVar[tuple[str | dict[str, object], ...]]
+        A tuple of invalid JSON representations for Zarr format version 3.
+    cast_value_params : ClassVar[tuple[tuple[Any, Any, Any], ...]]
+        A tuple of (dtype, value, expected) tuples for testing ZDType.cast_value.
+    """
+
     test_cls: type[ZDType[TBaseDType, TBaseScalar]]
     scalar_type: ClassVar[type[TBaseScalar]]
     valid_dtype: ClassVar[tuple[TBaseDType, ...]] = ()

From d8c3672c4499148bb25c61a06ba20c0d7492a1d3 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Thu, 15 May 2025 12:52:58 +0200
Subject: [PATCH 096/130] type hints

---
 src/zarr/core/dtype/__init__.py | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/src/zarr/core/dtype/__init__.py b/src/zarr/core/dtype/__init__.py
index a8cdfc0cbc..b973691f0f 100644
--- a/src/zarr/core/dtype/__init__.py
+++ b/src/zarr/core/dtype/__init__.py
@@ -1,6 +1,6 @@
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, TypeAlias
+from typing import TYPE_CHECKING, Final, TypeAlias
 
 from zarr.core.dtype.common import DataTypeValidationError
 from zarr.core.dtype.npy.bool import Bool
@@ -64,19 +64,19 @@
 data_type_registry = DataTypeRegistry()
 
 IntegerDType = Int8 | Int16 | Int32 | Int64 | UInt8 | UInt16 | UInt32 | UInt64
-INTEGER_DTYPE = Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64
+INTEGER_DTYPE: Final = Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64
 
 FloatDType = Float16 | Float32 | Float64
-FLOAT_DTYPE = Float16, Float32, Float64
+FLOAT_DTYPE: Final = Float16, Float32, Float64
 
 ComplexFloatDType = Complex64 | Complex128
-COMPLEX_FLOAT_DTYPE = Complex64, Complex128
+COMPLEX_FLOAT_DTYPE: Final = Complex64, Complex128
 
 StringDType = FixedLengthUnicode | VariableLengthString | FixedLengthAscii
-STRING_DTYPE = FixedLengthUnicode, VariableLengthString, FixedLengthAscii
+STRING_DTYPE: Final = FixedLengthUnicode, VariableLengthString, FixedLengthAscii
 
 TimeDType = DateTime64 | TimeDelta64
-TIME_DTYPE = DateTime64, TimeDelta64
+TIME_DTYPE: Final = DateTime64, TimeDelta64
 
 AnyDType = (
     Bool
@@ -90,7 +90,7 @@
 )
 # mypy has trouble inferring the type of variablelengthstring dtype, because its class definition
 # depends on the installed numpy version. That's why the type: ignore statement is needed here.
-ANY_DTYPE: tuple[type[ZDType[TBaseDType, TBaseScalar]], ...] = (  # type: ignore[assignment]
+ANY_DTYPE: Final = (
     Bool,
     *INTEGER_DTYPE,
     *FLOAT_DTYPE,
@@ -101,10 +101,12 @@
     *TIME_DTYPE,
 )
 
-ZDTypeLike: TypeAlias = npt.DTypeLike | ZDType[TBaseDType, TBaseScalar] | dict[str, JSON]
+# This type models inputs that can be coerced to a ZDType
+ZDTypeLike: TypeAlias = npt.DTypeLike | ZDType[TBaseDType, TBaseScalar] | dict[str, JSON] | str
 
 for dtype in ANY_DTYPE:
-    data_type_registry.register(dtype._zarr_v3_name, dtype)
+    # mypy does not know that all the elements of ANY_DTYPE are subclasses of ZDType
+    data_type_registry.register(dtype._zarr_v3_name, dtype)  # type: ignore[arg-type]
 
 
 # TODO: find a better name for this function

From d8a382a167c0eaf043a158737b6c4aa5c6a252a7 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Fri, 16 May 2025 12:21:33 +0200
Subject: [PATCH 097/130] expand changelog

---
 changes/2874.feature.rst | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/changes/2874.feature.rst b/changes/2874.feature.rst
index 50634e5395..4c50532ae0 100644
--- a/changes/2874.feature.rst
+++ b/changes/2874.feature.rst
@@ -1,2 +1,9 @@
-Adds zarr-specific data type classes. This replaces the direct use of numpy data types for zarr
-v2 and a fixed set of string enums for zarr v3. For more on this new feature, see the `documentation </user-guide/data_types.html>`_
\ No newline at end of file
+Adds zarr-specific data type classes. This replaces the internal use of numpy data types for zarr
+v2 and a fixed set of string enums for zarr v3. This change is largely internal, but it does
+change the type of the ``dtype`` and ``data_type`` fields on the ``ArrayV2Metadata`` and
+``ArrayV3Metadata`` classes. It also changes the JSON metadata representation of the
+variable-length string data type, but the old metadata representation can still be
+used when reading arrays. The logic for automatically choosing the chunk encoding for a given data
+type has also changed, and this necessitated changes to the ``config`` API.
+
+For more on this new feature, see the `documentation </user-guide/data_types.html>`_
\ No newline at end of file

From 9aa751b170b83f92cc19e87bc44cddfa0bd9a7de Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Fri, 16 May 2025 18:50:33 +0200
Subject: [PATCH 098/130] tweak docstring

---
 src/zarr/core/dtype/wrapper.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/zarr/core/dtype/wrapper.py b/src/zarr/core/dtype/wrapper.py
index 199cbda5d8..3a56a85788 100644
--- a/src/zarr/core/dtype/wrapper.py
+++ b/src/zarr/core/dtype/wrapper.py
@@ -55,7 +55,7 @@ class ZDType(Generic[TDType_co, TScalar_co], ABC):
     Attributes
     ----------
     dtype_cls : ClassVar[type[TDType]]
-        The numpy dtype class. This is a class variable. Instances of this class cannot set it.
+        The wrapped dtype class. This is a class variable. Instances of this class cannot set it.
     _zarr_v3_name : ClassVar[str]
         The name given to the wrapped data type by a zarr v3 data type specification. Note that this
         is not necessarily the same name that will appear in metadata documents, as some data types

From e4a0372849b04d908a4a4e1487345615f96bed7d Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Mon, 19 May 2025 12:36:55 +0200
Subject: [PATCH 099/130] support v3 nan strings in JSON for float dtypes

---
 src/zarr/core/dtype/common.py            |   8 +-
 src/zarr/core/dtype/npy/common.py        | 198 +++++------------------
 src/zarr/core/dtype/npy/complex.py       |  33 ++--
 src/zarr/core/dtype/npy/float.py         |  49 ++++--
 src/zarr/core/metadata/v3.py             |   6 +-
 tests/test_dtype/test_npy/test_common.py | 108 ++++++-------
 tests/test_dtype/test_npy/test_float.py  |  15 ++
 tests/test_metadata/test_v3.py           |  11 +-
 8 files changed, 180 insertions(+), 248 deletions(-)

diff --git a/src/zarr/core/dtype/common.py b/src/zarr/core/dtype/common.py
index 4249c57b1f..ecc475192c 100644
--- a/src/zarr/core/dtype/common.py
+++ b/src/zarr/core/dtype/common.py
@@ -1,11 +1,13 @@
 from __future__ import annotations
 
 from dataclasses import dataclass
-from typing import Literal
+from typing import Final, Literal
 
 Endianness = Literal["little", "big"]
-SpecialFloats = Literal["NaN", "Infinity", "-Infinity"]
-JSONFloat = float | SpecialFloats
+SpecialFloatStrings = Literal["NaN", "Infinity", "-Infinity"]
+SPECIAL_FLOAT_STRINGS: Final = ("NaN", "Infinity", "-Infinity")
+JSONFloatV2 = float | SpecialFloatStrings
+JSONFloatV3 = float | SpecialFloatStrings | str
 
 
 class DataTypeValidationError(ValueError): ...
diff --git a/src/zarr/core/dtype/npy/common.py b/src/zarr/core/dtype/npy/common.py
index 8033e48291..2481dcb150 100644
--- a/src/zarr/core/dtype/npy/common.py
+++ b/src/zarr/core/dtype/npy/common.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 import base64
+import struct
 import sys
 from collections.abc import Sequence
 from typing import (
@@ -18,7 +19,7 @@
 
 import numpy as np
 
-from zarr.core.dtype.common import Endianness, JSONFloat
+from zarr.core.dtype.common import SPECIAL_FLOAT_STRINGS, Endianness, JSONFloatV2, JSONFloatV3
 
 if TYPE_CHECKING:
     from zarr.core.common import JSON, ZarrFormat
@@ -112,7 +113,7 @@ def endianness_to_numpy_str(endianness: Endianness | None) -> EndiannessNumpy:
     )
 
 
-def float_from_json_v2(data: JSONFloat) -> float:
+def float_from_json_v2(data: JSONFloatV2) -> float:
     """
     Convert a JSON float to a float (Zarr v2).
 
@@ -137,7 +138,7 @@ def float_from_json_v2(data: JSONFloat) -> float:
             return float(data)
 
 
-def float_from_json_v3(data: JSONFloat) -> float:
+def float_from_json_v3(data: JSONFloatV3) -> float:
     """
     Convert a JSON float to a float (v3).
 
@@ -150,31 +151,35 @@ def float_from_json_v3(data: JSONFloat) -> float:
     -------
     float
         The float value.
-    """
-    # todo: support the v3-specific NaN handling
-    return float_from_json_v2(data)
 
-
-def float_from_json(data: JSONFloat, *, zarr_format: ZarrFormat) -> float:
-    """
-    Convert a JSON float to a float based on zarr format.
-
-    Parameters
-    ----------
-    data : JSONFloat
-        The JSON float to convert.
-    zarr_format : ZarrFormat
-        The zarr format version.
-
-    Returns
-    -------
-    float
-        The float value.
-    """
-    if zarr_format == 2:
-        return float_from_json_v2(data)
-    else:
-        return float_from_json_v3(data)
+    Notes
+    -----
+    Zarr V3 allows floats to be stored as hex strings. To quote the spec:
+       "...for float32, "NaN" is equivalent to "0x7fc00000".
+       This representation is the only way to specify a NaN value other than the specific NaN value
+       denoted by "NaN"."
+    """
+
+    if isinstance(data, str):
+        if data in SPECIAL_FLOAT_STRINGS:
+            return float_from_json_v2(data)  # type: ignore[arg-type]
+        if not data.startswith("0x"):
+            msg = (
+                f"Invalid float value: {data!r}. Expected a string starting with the hex prefix"
+                " '0x', or one of 'NaN', 'Infinity', or '-Infinity'."
+            )
+            raise ValueError(msg)
+        if len(data[2:]) == 4:
+            dtype_code = ">e"
+        elif len(data[2:]) == 8:
+            dtype_code = ">f"
+        elif len(data[2:]) == 16:
+            dtype_code = ">d"
+        else:
+            msg = f"Invalid float value: {data!r}. Expected a string of length 4, 8, or 16."
+            raise ValueError(msg)
+        return float(struct.unpack(dtype_code, bytes.fromhex(data[2:]))[0])
+    return float_from_json_v2(data)
 
 
 def bytes_from_json(data: str, *, zarr_format: ZarrFormat) -> bytes:
@@ -221,7 +226,7 @@ def bytes_to_json(data: bytes, zarr_format: ZarrFormat) -> str:
     return base64.b64encode(data).decode("ascii")
 
 
-def float_to_json_v2(data: float | np.floating[Any]) -> JSONFloat:
+def float_to_json_v2(data: float | np.floating[Any]) -> JSONFloatV2:
     """
     Convert a float to JSON (v2).
 
@@ -242,7 +247,7 @@ def float_to_json_v2(data: float | np.floating[Any]) -> JSONFloat:
     return float(data)
 
 
-def float_to_json_v3(data: float | np.floating[Any]) -> JSONFloat:
+def float_to_json_v3(data: float | np.floating[Any]) -> JSONFloatV3:
     """
     Convert a float to JSON (v3).
 
@@ -261,32 +266,9 @@ def float_to_json_v3(data: float | np.floating[Any]) -> JSONFloat:
     return float_to_json_v2(data)
 
 
-def float_to_json(data: float | np.floating[Any], *, zarr_format: ZarrFormat) -> JSONFloat:
-    """
-    Convert a float to JSON, parametrized by the zarr format version.
-
-    Parameters
-    ----------
-    data : float | np.floating
-        The float value to convert.
-    zarr_format : ZarrFormat
-        The zarr format version.
-
-    Returns
-    -------
-    JSONFloat
-        The JSON representation of the float.
-    """
-    if zarr_format == 2:
-        return float_to_json_v2(data)
-    else:
-        return float_to_json_v3(data)
-    raise ValueError(f"Invalid zarr format: {zarr_format}. Expected 2 or 3.")
-
-
 def complex_float_to_json_v3(
     data: complex | np.complexfloating[Any, Any],
-) -> tuple[JSONFloat, JSONFloat]:
+) -> tuple[JSONFloatV3, JSONFloatV3]:
     """
     Convert a complex number to JSON as defined by the Zarr V3 spec.
 
@@ -305,7 +287,7 @@ def complex_float_to_json_v3(
 
 def complex_float_to_json_v2(
     data: complex | np.complexfloating[Any, Any],
-) -> tuple[JSONFloat, JSONFloat]:
+) -> tuple[JSONFloatV2, JSONFloatV2]:
     """
     Convert a complex number to JSON as defined by the Zarr V2 spec.
 
@@ -322,32 +304,7 @@ def complex_float_to_json_v2(
     return float_to_json_v2(data.real), float_to_json_v2(data.imag)
 
 
-def complex_float_to_json(
-    data: complex | np.complexfloating[Any, Any], *, zarr_format: ZarrFormat
-) -> tuple[JSONFloat, JSONFloat]:
-    """
-    Convert a complex number to JSON, parametrized by the zarr format version.
-
-    Parameters
-    ----------
-    data : complex | np.complexfloating
-        The complex value to convert.
-    zarr_format : ZarrFormat
-        The zarr format version.
-
-    Returns
-    -------
-    tuple[JSONFloat, JSONFloat] or JSONFloat
-        The JSON representation of the complex number.
-    """
-    if zarr_format == 2:
-        return complex_float_to_json_v2(data)
-    else:
-        return complex_float_to_json_v3(data)
-    raise ValueError(f"Invalid zarr format: {zarr_format}. Expected 2 or 3.")
-
-
-def complex_float_from_json_v2(data: tuple[JSONFloat, JSONFloat]) -> complex:
+def complex_float_from_json_v2(data: tuple[JSONFloatV2, JSONFloatV2]) -> complex:
     """
     Convert a JSON complex float to a complex number (v2).
 
@@ -364,7 +321,7 @@ def complex_float_from_json_v2(data: tuple[JSONFloat, JSONFloat]) -> complex:
     return complex(float_from_json_v2(data[0]), float_from_json_v2(data[1]))
 
 
-def complex_float_from_json_v3(data: tuple[JSONFloat, JSONFloat]) -> complex:
+def complex_float_from_json_v3(data: tuple[JSONFloatV3, JSONFloatV3]) -> complex:
     """
     Convert a JSON complex float to a complex number (v3).
 
@@ -381,30 +338,7 @@ def complex_float_from_json_v3(data: tuple[JSONFloat, JSONFloat]) -> complex:
     return complex(float_from_json_v3(data[0]), float_from_json_v3(data[1]))
 
 
-def complex_float_from_json(data: tuple[JSONFloat, JSONFloat], zarr_format: ZarrFormat) -> complex:
-    """
-    Convert a JSON complex float to a complex number based on zarr format.
-
-    Parameters
-    ----------
-    data : tuple[JSONFloat, JSONFloat]
-        The JSON complex float to convert.
-    zarr_format : ZarrFormat
-        The zarr format version.
-
-    Returns
-    -------
-    np.complexfloating
-        The complex number.
-    """
-    if zarr_format == 2:
-        return complex_float_from_json_v2(data)
-    else:
-        return complex_float_from_json_v3(data)
-    raise ValueError(f"Invalid zarr format: {zarr_format}. Expected 2 or 3.")
-
-
-def check_json_float_v2(data: JSON) -> TypeGuard[JSONFloat]:
+def check_json_float_v2(data: JSON) -> TypeGuard[JSONFloatV2]:
     """
     Check if a JSON value represents a float (v2).
 
@@ -423,7 +357,7 @@ def check_json_float_v2(data: JSON) -> TypeGuard[JSONFloat]:
     return isinstance(data, float | int)
 
 
-def check_json_float_v3(data: JSON) -> TypeGuard[JSONFloat]:
+def check_json_float_v3(data: JSON) -> TypeGuard[JSONFloatV3]:
     """
     Check if a JSON value represents a float (v3).
 
@@ -437,11 +371,10 @@ def check_json_float_v3(data: JSON) -> TypeGuard[JSONFloat]:
     Bool
         True if the data is a float, False otherwise.
     """
-    # TODO: handle the special JSON serialization of different NaN values
-    return check_json_float_v2(data)
+    return check_json_float_v2(data) or (isinstance(data, str) and data.startswith("0x"))
 
 
-def check_json_complex_float_v2(data: JSON) -> TypeGuard[tuple[JSONFloat, JSONFloat]]:
+def check_json_complex_float_v2(data: JSON) -> TypeGuard[tuple[JSONFloatV2, JSONFloatV2]]:
     """
     Check if a JSON value represents a complex float, as per the behavior of zarr-python 2.x
 
@@ -464,7 +397,7 @@ def check_json_complex_float_v2(data: JSON) -> TypeGuard[tuple[JSONFloat, JSONFl
     )
 
 
-def check_json_complex_float_v3(data: JSON) -> TypeGuard[tuple[JSONFloat, JSONFloat]]:
+def check_json_complex_float_v3(data: JSON) -> TypeGuard[tuple[JSONFloatV3, JSONFloatV3]]:
     """
     Check if a JSON value represents a complex float, as per the zarr v3 spec
 
@@ -487,51 +420,6 @@ def check_json_complex_float_v3(data: JSON) -> TypeGuard[tuple[JSONFloat, JSONFl
     )
 
 
-def check_json_complex_float(
-    data: JSON, zarr_format: ZarrFormat
-) -> TypeGuard[tuple[JSONFloat, JSONFloat]]:
-    """
-    Check if a JSON value represents a complex float, given a zarr format.
-
-    Parameters
-    ----------
-    data : JSON
-        The JSON value to check.
-    zarr_format : ZarrFormat
-        The zarr format version.
-
-    Returns
-    -------
-    Bool
-        True if the data represents a complex float, False otherwise.
-    """
-    if zarr_format == 2:
-        return check_json_complex_float_v2(data)
-    return check_json_complex_float_v3(data)
-
-
-def check_json_float(data: JSON, zarr_format: ZarrFormat) -> TypeGuard[float]:
-    """
-    Check if a JSON value represents a float based on zarr format.
-
-    Parameters
-    ----------
-    data : JSON
-        The JSON value to check.
-    zarr_format : ZarrFormat
-        The zarr format version.
-
-    Returns
-    -------
-    Bool
-        True if the data is a float, False otherwise.
-    """
-    if zarr_format == 2:
-        return check_json_float_v2(data)
-    else:
-        return check_json_float_v3(data)
-
-
 def check_json_int(data: JSON) -> TypeGuard[int]:
     """
     Check if a JSON value is an integer.
diff --git a/src/zarr/core/dtype/npy/complex.py b/src/zarr/core/dtype/npy/complex.py
index fab4ca9893..3e5f640946 100644
--- a/src/zarr/core/dtype/npy/complex.py
+++ b/src/zarr/core/dtype/npy/complex.py
@@ -15,9 +15,12 @@
     ComplexLike,
     TComplexDType_co,
     TComplexScalar_co,
-    check_json_complex_float,
-    complex_float_from_json,
-    complex_float_to_json,
+    check_json_complex_float_v2,
+    check_json_complex_float_v3,
+    complex_float_from_json_v2,
+    complex_float_from_json_v3,
+    complex_float_to_json_v2,
+    complex_float_to_json_v3,
     endianness_from_numpy_str,
     endianness_to_numpy_str,
 )
@@ -113,11 +116,19 @@ def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> TComplexSca
         TScalar_co
             The numpy float.
         """
-        if check_json_complex_float(data, zarr_format=zarr_format):
-            return self._cast_value_unsafe(complex_float_from_json(data, zarr_format=zarr_format))
-        raise TypeError(
-            f"Invalid type: {data}. Expected a float or a special string encoding of a float."
-        )
+        if zarr_format == 2:
+            if check_json_complex_float_v2(data):
+                return self._cast_value_unsafe(complex_float_from_json_v2(data))
+            raise TypeError(
+                f"Invalid type: {data}. Expected a float or a special string encoding of a float."
+            )
+        elif zarr_format == 3:
+            if check_json_complex_float_v3(data):
+                return self._cast_value_unsafe(complex_float_from_json_v3(data))
+            raise TypeError(
+                f"Invalid type: {data}. Expected a float or a special string encoding of a float."
+            )
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     def to_json_value(self, data: object, zarr_format: ZarrFormat) -> JSON:
         """
@@ -136,7 +147,11 @@ def to_json_value(self, data: object, zarr_format: ZarrFormat) -> JSON:
             The JSON-serializable form of the complex number, which is a list of two floats,
             each of which is encoding according to a zarr-format-specific encoding.
         """
-        return complex_float_to_json(self.cast_value(data), zarr_format=zarr_format)
+        if zarr_format == 2:
+            return complex_float_to_json_v2(self.cast_value(data))
+        elif zarr_format == 3:
+            return complex_float_to_json_v3(self.cast_value(data))
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
 
 @dataclass(frozen=True, kw_only=True)
diff --git a/src/zarr/core/dtype/npy/float.py b/src/zarr/core/dtype/npy/float.py
index bedd6a4751..e4d6e42ef3 100644
--- a/src/zarr/core/dtype/npy/float.py
+++ b/src/zarr/core/dtype/npy/float.py
@@ -10,11 +10,14 @@
     FloatLike,
     TFloatDType_co,
     TFloatScalar_co,
-    check_json_float,
+    check_json_float_v2,
+    check_json_float_v3,
     endianness_from_numpy_str,
     endianness_to_numpy_str,
-    float_from_json,
-    float_to_json,
+    float_from_json_v2,
+    float_from_json_v3,
+    float_to_json_v2,
+    float_to_json_v3,
 )
 from zarr.core.dtype.wrapper import TBaseDType, ZDType
 
@@ -72,11 +75,11 @@ def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[JSON]:
             return data == cls._zarr_v3_name
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
-    def check_value(self, value: object) -> TypeGuard[FloatLike]:
-        return isinstance(value, FloatLike)
+    def check_value(self, data: object) -> TypeGuard[FloatLike]:
+        return isinstance(data, FloatLike)
 
-    def _cast_value_unsafe(self, value: object) -> TFloatScalar_co:
-        return self.to_dtype().type(value)  # type: ignore[return-value, arg-type]
+    def _cast_value_unsafe(self, data: object) -> TFloatScalar_co:
+        return self.to_dtype().type(data)  # type: ignore[return-value, arg-type]
 
     def default_value(self) -> TFloatScalar_co:
         """
@@ -105,13 +108,24 @@ def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> TFloatScala
         TScalar_co
             The numpy float.
         """
-        if check_json_float(data, zarr_format=zarr_format):
-            return self._cast_value_unsafe(float_from_json(data, zarr_format=zarr_format))
-        raise TypeError(
-            f"Invalid type: {data}. Expected a float or a special string encoding of a float."
-        )
-
-    def to_json_value(self, data: object, zarr_format: ZarrFormat) -> float | str:
+        if zarr_format == 2:
+            if check_json_float_v2(data):
+                return self._cast_value_unsafe(float_from_json_v2(data))
+            else:
+                raise TypeError(
+                    f"Invalid type: {data}. Expected a float or a special string encoding of a float."
+                )
+        elif zarr_format == 3:
+            if check_json_float_v3(data):
+                return self._cast_value_unsafe(float_from_json_v3(data))
+            else:
+                raise TypeError(
+                    f"Invalid type: {data}. Expected a float or a special string encoding of a float."
+                )
+        else:
+            raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+
+    def to_json_value(self, data: object, *, zarr_format: ZarrFormat) -> float | str:
         """
         Convert an object to a JSON-serializable float.
 
@@ -128,7 +142,12 @@ def to_json_value(self, data: object, zarr_format: ZarrFormat) -> float | str:
             The JSON-serializable form of the float, which is potentially a number or a string.
             See the zarr specifications for details on the JSON encoding for floats.
         """
-        return float_to_json(self._cast_value_unsafe(data), zarr_format=zarr_format)
+        if zarr_format == 2:
+            return float_to_json_v2(self._cast_value_unsafe(data))
+        elif zarr_format == 3:
+            return float_to_json_v3(self._cast_value_unsafe(data))
+        else:
+            raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
 
 @dataclass(frozen=True, kw_only=True)
diff --git a/src/zarr/core/metadata/v3.py b/src/zarr/core/metadata/v3.py
index 07856a3c7c..1c62e4b41c 100644
--- a/src/zarr/core/metadata/v3.py
+++ b/src/zarr/core/metadata/v3.py
@@ -295,7 +295,11 @@ def from_dict(cls, data: dict[str, JSON]) -> Self:
         data_type = get_data_type_from_json(data_type_json, zarr_format=3)
 
         # check that the fill value is consistent with the data type
-        fill_value_parsed = data_type.from_json_value(_data.pop("fill_value"), zarr_format=3)
+        try:
+            fill = _data.pop("fill_value")
+            fill_value_parsed = data_type.from_json_value(fill, zarr_format=3)
+        except ValueError as e:
+            raise TypeError(f"Invalid fill_value: {fill!r}") from e
 
         # dimension_names key is optional, normalize missing to `None`
         _data["dimension_names"] = _data.pop("dimension_names", None)
diff --git a/tests/test_dtype/test_npy/test_common.py b/tests/test_dtype/test_npy/test_common.py
index 69a14a92b0..258ab48fe1 100644
--- a/tests/test_dtype/test_npy/test_common.py
+++ b/tests/test_dtype/test_npy/test_common.py
@@ -9,26 +9,22 @@
 import numpy as np
 import pytest
 
-from zarr.core.dtype.common import Endianness, JSONFloat, SpecialFloats
+from zarr.core.dtype.common import Endianness, JSONFloatV2, SpecialFloatStrings
 from zarr.core.dtype.npy.common import (
     EndiannessNumpy,
     bytes_from_json,
     bytes_to_json,
     check_json_bool,
-    check_json_complex_float,
     check_json_complex_float_v2,
     check_json_complex_float_v3,
-    check_json_float,
     check_json_float_v2,
     check_json_float_v3,
     check_json_int,
     check_json_str,
-    complex_float_to_json,
     complex_float_to_json_v2,
     complex_float_to_json_v3,
     endianness_from_numpy_str,
     endianness_to_numpy_str,
-    float_from_json,
     float_from_json_v2,
     float_from_json_v3,
     float_to_json_v2,
@@ -49,7 +45,7 @@ def nan_equal(a: object, b: object) -> bool:
     return a == b
 
 
-json_float_v2_cases: list[tuple[JSONFloat, float | np.floating[Any]]] = [
+json_float_v2_cases: list[tuple[JSONFloatV2, float | np.floating[Any]]] = [
     ("Infinity", float("inf")),
     ("Infinity", np.inf),
     ("-Infinity", float("-inf")),
@@ -99,12 +95,12 @@ def test_endianness_to_numpy_str(data: str | None, expected: str) -> None:
 
 
 @pytest.mark.parametrize(("data", "expected"), json_float_v2_cases + [("SHOULD_ERR", "")])
-def test_float_from_json_v2(data: JSONFloat | str, expected: float | str) -> None:
+def test_float_from_json_v2(data: JSONFloatV2 | str, expected: float | str) -> None:
     """
     Test that float_from_json_v2 correctly converts a JSON string representation of a float to a float.
     This test also checks that an invalid string input raises a ``ValueError``
     """
-    if data in get_args(SpecialFloats) or isinstance(data, float):
+    if data in get_args(SpecialFloatStrings) or isinstance(data, float):
         assert nan_equal(float_from_json_v2(data), expected)  # type: ignore[arg-type]
     else:
         msg = f"could not convert string to float: {data!r}"
@@ -113,36 +109,25 @@ def test_float_from_json_v2(data: JSONFloat | str, expected: float | str) -> Non
 
 
 @pytest.mark.parametrize(("data", "expected"), json_float_v3_cases + [("SHOULD_ERR", "")])
-def test_float_from_json_v3(data: JSONFloat | str, expected: float | str) -> None:
+def test_float_from_json_v3(data: JSONFloatV2 | str, expected: float | str) -> None:
     """
     Test that float_from_json_v3 correctly converts a JSON string representation of a float to a float.
     This test also checks that an invalid string input raises a ``ValueError``
     """
-    if data in get_args(SpecialFloats) or isinstance(data, float):
-        assert nan_equal(float_from_json_v3(data), expected)  # type: ignore[arg-type]
+    if data in get_args(SpecialFloatStrings) or isinstance(data, float):
+        assert nan_equal(float_from_json_v3(data), expected)
     else:
-        msg = f"could not convert string to float: {data!r}"
+        msg = (
+            f"Invalid float value: {data!r}. Expected a string starting with the hex prefix"
+            " '0x', or one of 'NaN', 'Infinity', or '-Infinity'."
+        )
         with pytest.raises(ValueError, match=msg):
-            float_from_json_v3(data)  # type: ignore[arg-type]
-
-
-@pytest.mark.parametrize(("data", "expected"), json_float_v2_cases)
-def test_float_from_json(data: JSONFloat, expected: float | str, zarr_format: ZarrFormat) -> None:
-    """
-    Test that float_from_json_v3 correctly converts a JSON string representation of a float to a float.
-    This test also checks that an invalid string input raises a ``ValueError``
-    """
-    observed = float_from_json(data, zarr_format=zarr_format)
-    if zarr_format == 2:
-        expected = float_from_json_v2(data)
-    else:
-        expected = float_from_json_v3(data)
-    assert nan_equal(observed, expected)
+            float_from_json_v3(data)
 
 
 # note the order of parameters relative to the order of the parametrized variable.
 @pytest.mark.parametrize(("expected", "data"), json_float_v2_cases)
-def test_float_to_json_v2(data: float | np.floating[Any], expected: JSONFloat) -> None:
+def test_float_to_json_v2(data: float | np.floating[Any], expected: JSONFloatV2) -> None:
     """
     Test that floats are JSON-encoded properly for zarr v2
     """
@@ -152,7 +137,7 @@ def test_float_to_json_v2(data: float | np.floating[Any], expected: JSONFloat) -
 
 # note the order of parameters relative to the order of the parametrized variable.
 @pytest.mark.parametrize(("expected", "data"), json_float_v3_cases)
-def test_float_to_json_v3(data: float | np.floating[Any], expected: JSONFloat) -> None:
+def test_float_to_json_v3(data: float | np.floating[Any], expected: JSONFloatV2) -> None:
     """
     Test that floats are JSON-encoded properly for zarr v3
     """
@@ -186,7 +171,9 @@ def test_bytes_to_json(zarr_format: ZarrFormat) -> None:
 
 # note the order of parameters relative to the order of the parametrized variable.
 @pytest.mark.parametrize(("json_expected", "float_data"), json_float_v2_cases)
-def test_complex_to_json_v2(float_data: float | np.floating[Any], json_expected: JSONFloat) -> None:
+def test_complex_to_json_v2(
+    float_data: float | np.floating[Any], json_expected: JSONFloatV2
+) -> None:
     """
     Test that complex numbers are correctly converted to JSON in v2 format.
 
@@ -202,7 +189,9 @@ def test_complex_to_json_v2(float_data: float | np.floating[Any], json_expected:
 
 # note the order of parameters relative to the order of the parametrized variable.
 @pytest.mark.parametrize(("json_expected", "float_data"), json_float_v3_cases)
-def test_complex_to_json_v3(float_data: float | np.floating[Any], json_expected: JSONFloat) -> None:
+def test_complex_to_json_v3(
+    float_data: float | np.floating[Any], json_expected: JSONFloatV2
+) -> None:
     """
     Test that complex numbers are correctly converted to JSON in v3 format.
 
@@ -218,7 +207,7 @@ def test_complex_to_json_v3(float_data: float | np.floating[Any], json_expected:
 
 @pytest.mark.parametrize(("json_expected", "float_data"), json_float_v3_cases)
 def test_complex_float_to_json(
-    float_data: float | np.floating[Any], json_expected: JSONFloat, zarr_format: ZarrFormat
+    float_data: float | np.floating[Any], json_expected: JSONFloatV2, zarr_format: ZarrFormat
 ) -> None:
     """
     Test that complex numbers are correctly converted to JSON in v2 or v3 formats, depending
@@ -231,18 +220,27 @@ def test_complex_float_to_json(
 
     cplx = complex(float_data, float_data)
     cplx_npy = np.complex128(cplx)
-    assert complex_float_to_json(cplx, zarr_format=zarr_format) == (json_expected, json_expected)
-    assert complex_float_to_json(cplx_npy, zarr_format=zarr_format) == (
-        json_expected,
-        json_expected,
-    )
+    if zarr_format == 2:
+        assert complex_float_to_json_v2(cplx) == (json_expected, json_expected)
+        assert complex_float_to_json_v2(cplx_npy) == (
+            json_expected,
+            json_expected,
+        )
+    elif zarr_format == 3:
+        assert complex_float_to_json_v3(cplx) == (json_expected, json_expected)
+        assert complex_float_to_json_v3(cplx_npy) == (
+            json_expected,
+            json_expected,
+        )
+    else:
+        raise ValueError("zarr_format must be 2 or 3")  # pragma: no cover
 
 
-check_json_float_cases = get_args(SpecialFloats) + (1.0, 2)
+check_json_float_cases = get_args(SpecialFloatStrings) + (1.0, 2)
 
 
 @pytest.mark.parametrize("data", check_json_float_cases)
-def test_check_json_float_v2_valid(data: JSONFloat | int) -> None:
+def test_check_json_float_v2_valid(data: JSONFloatV2 | int) -> None:
     assert check_json_float_v2(data)
 
 
@@ -251,7 +249,7 @@ def test_check_json_float_v2_invalid() -> None:
 
 
 @pytest.mark.parametrize("data", check_json_float_cases)
-def test_check_json_float_v3_valid(data: JSONFloat | int) -> None:
+def test_check_json_float_v3_valid(data: JSONFloatV2 | int) -> None:
     assert check_json_float_v3(data)
 
 
@@ -259,25 +257,15 @@ def test_check_json_float_v3_invalid() -> None:
     assert not check_json_float_v3("invalid")
 
 
-@pytest.mark.parametrize("data", check_json_float_cases)
-def test_check_json_float(data: JSONFloat | int, zarr_format: ZarrFormat) -> None:
-    observed = check_json_float(data, zarr_format=zarr_format)
-    if zarr_format == 2:
-        expected = check_json_float_v2(data)
-    else:
-        expected = check_json_float_v3(data)
-    assert observed == expected
-
-
-check_json_complex_float_true_cases = (
+check_json_complex_float_true_cases: tuple[list[JSONFloatV2], ...] = (
+    [0.0, 1.0],
     [0.0, 1.0],
-    (0.0, 1.0),
     [-1.0, "NaN"],
     ["Infinity", 1.0],
     ["Infinity", "NaN"],
 )
 
-check_json_complex_float_false_cases = (
+check_json_complex_float_false_cases: tuple[object, ...] = (
     0.0,
     "foo",
     [0.0],
@@ -309,12 +297,22 @@ def test_check_json_complex_float_v3_false(data: JSON) -> None:
 
 @pytest.mark.parametrize("data", check_json_complex_float_true_cases)
 def test_check_json_complex_float_true(data: JSON, zarr_format: ZarrFormat) -> None:
-    assert check_json_complex_float(data, zarr_format=zarr_format)
+    if zarr_format == 2:
+        assert check_json_complex_float_v2(data)
+    elif zarr_format == 3:
+        assert check_json_complex_float_v3(data)
+    else:
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
 
 @pytest.mark.parametrize("data", check_json_complex_float_false_cases)
 def test_check_json_complex_float_false(data: JSON, zarr_format: ZarrFormat) -> None:
-    assert not check_json_complex_float(data, zarr_format=zarr_format)
+    if zarr_format == 2:
+        assert not check_json_complex_float_v2(data)
+    elif zarr_format == 3:
+        assert not check_json_complex_float_v3(data)
+    else:
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
 
 def test_check_json_int() -> None:
diff --git a/tests/test_dtype/test_npy/test_float.py b/tests/test_dtype/test_npy/test_float.py
index 5981d09514..ba43b6bcf6 100644
--- a/tests/test_dtype/test_npy/test_float.py
+++ b/tests/test_dtype/test_npy/test_float.py
@@ -12,6 +12,15 @@ def scalar_equals(self, scalar1: object, scalar2: object) -> bool:
             return True
         return super().scalar_equals(scalar1, scalar2)
 
+    hex_nan_params: tuple[str, ...] = ()
+
+    def test_hex_nan(self, hex_nan_params: str) -> None:
+        """
+        Test that hexadecimal strings can be read as NaN values
+        """
+        zdtype = self.test_cls()
+        assert np.isnan(zdtype.from_json_value(hex_nan_params, zarr_format=3))
+
 
 class TestFloat16(_BaseTestFloat):
     test_cls = Float16
@@ -52,6 +61,8 @@ class TestFloat16(_BaseTestFloat):
         (Float16(), "NaN", np.float16("NaN")),
     )
 
+    hex_nan_params = ("0x7fc0", "0x7fc1")
+
 
 class TestFloat32(_BaseTestFloat):
     test_cls = Float32
@@ -94,6 +105,8 @@ class TestFloat32(_BaseTestFloat):
         (Float32(), "NaN", np.float32("NaN")),
     )
 
+    hex_nan_params = ("0x7fc00000", "0x7fc00001")
+
 
 class TestFloat64(_BaseTestFloat):
     test_cls = Float64
@@ -134,3 +147,5 @@ class TestFloat64(_BaseTestFloat):
         (Float64(), -1.0, np.float64(-1.0)),
         (Float64(), "NaN", np.float64("NaN")),
     )
+
+    hex_nan_params = ("0x7ff8000000000000", "0x7ff8000000000001")
diff --git a/tests/test_metadata/test_v3.py b/tests/test_metadata/test_v3.py
index fa23dccf59..0d7da0153f 100644
--- a/tests/test_metadata/test_v3.py
+++ b/tests/test_metadata/test_v3.py
@@ -12,7 +12,6 @@
 from zarr.core.chunk_key_encodings import DefaultChunkKeyEncoding, V2ChunkKeyEncoding
 from zarr.core.config import config
 from zarr.core.dtype import get_data_type_from_native_dtype
-from zarr.core.dtype.npy.common import check_json_complex_float
 from zarr.core.dtype.npy.time import DateTime64
 from zarr.core.group import GroupMetadata, parse_node_type
 from zarr.core.metadata.v3 import (
@@ -28,7 +27,7 @@
     from typing import Any
 
     from zarr.abc.codec import Codec
-    from zarr.core.common import JSON, ZarrFormat
+    from zarr.core.common import JSON
 
 
 from zarr.core.metadata.v3 import (
@@ -135,14 +134,6 @@ def test_jsonify_fill_value_complex(fill_value: Any, dtype_str: str) -> None:
     assert dtype.to_json_value(observed, zarr_format=zarr_format) == tuple(fill_value)
 
 
-@pytest.mark.parametrize("data", [[1.0, 0.0, 3.0], [0, 1, 3], [1]])
-def test_complex_to_json_invalid(data: object, zarr_format: ZarrFormat) -> None:
-    assert not check_json_complex_float(data, zarr_format=zarr_format)
-    # match = f"Invalid type: {data}. Expected a sequence of two numbers."
-    # with pytest.raises(TypeError, match=re.escape(match)):
-    # complex_float_from_json(data=data, zarr_format=3)
-
-
 @pytest.mark.parametrize("fill_value", [{"foo": 10}])
 @pytest.mark.parametrize("dtype_str", [*int_dtypes, *float_dtypes, *complex_dtypes])
 def test_parse_fill_value_invalid_type(fill_value: Any, dtype_str: str) -> None:

From 8a976d6797508bd4d5167e51e495bf6d9cdd4f74 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Wed, 21 May 2025 17:21:28 +0200
Subject: [PATCH 100/130] revert removal of metadata chunk grid attribute

---
 src/zarr/core/array.py       | 58 +++++++++++-------------------------
 src/zarr/core/metadata/v2.py |  6 ++++
 src/zarr/core/metadata/v3.py | 14 +++++++++
 tests/test_array.py          |  2 +-
 tests/test_group.py          |  2 +-
 5 files changed, 40 insertions(+), 42 deletions(-)

diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py
index 2e3911361a..0e450d028a 100644
--- a/src/zarr/core/array.py
+++ b/src/zarr/core/array.py
@@ -5,7 +5,6 @@
 from asyncio import gather
 from collections.abc import Iterable
 from dataclasses import dataclass, field, replace
-from functools import cached_property
 from itertools import starmap
 from logging import getLogger
 from typing import (
@@ -32,7 +31,7 @@
 from zarr.codecs._v2 import V2Codec
 from zarr.codecs.bytes import BytesCodec
 from zarr.core._info import ArrayInfo
-from zarr.core.array_spec import ArrayConfig, ArrayConfigLike, ArraySpec, parse_array_config
+from zarr.core.array_spec import ArrayConfig, ArrayConfigLike, parse_array_config
 from zarr.core.attributes import Attributes
 from zarr.core.buffer import (
     BufferPrototype,
@@ -42,7 +41,7 @@
     default_buffer_prototype,
 )
 from zarr.core.buffer.cpu import buffer_prototype as cpu_buffer_prototype
-from zarr.core.chunk_grids import ChunkGrid, RegularChunkGrid, _auto_partition, normalize_chunks
+from zarr.core.chunk_grids import RegularChunkGrid, _auto_partition, normalize_chunks
 from zarr.core.chunk_key_encodings import (
     ChunkKeyEncoding,
     ChunkKeyEncodingLike,
@@ -951,13 +950,6 @@ def chunks(self) -> ChunkCoords:
         """
         return self.metadata.chunks
 
-    @cached_property
-    def chunk_grid(self) -> ChunkGrid:
-        if self.metadata.zarr_format == 2:
-            return RegularChunkGrid(chunk_shape=self.chunks)
-        else:
-            return self.metadata.chunk_grid
-
     @property
     def shards(self) -> ChunkCoords | None:
         """Returns the shard shape of the Array.
@@ -1281,20 +1273,6 @@ def nbytes(self) -> int:
         """
         return self.size * self.dtype.itemsize
 
-    def get_chunk_spec(
-        self, _chunk_coords: ChunkCoords, array_config: ArrayConfig, prototype: BufferPrototype
-    ) -> ArraySpec:
-        assert isinstance(self.chunk_grid, RegularChunkGrid), (
-            "Currently, only regular chunk grid is supported"
-        )
-        return ArraySpec(
-            shape=self.chunk_grid.chunk_shape,
-            dtype=self._zdtype,
-            fill_value=self.metadata.fill_value,
-            config=array_config,
-            prototype=prototype,
-        )
-
     async def _get_selection(
         self,
         indexer: Indexer,
@@ -1334,7 +1312,7 @@ async def _get_selection(
                 [
                     (
                         self.store_path / self.metadata.encode_chunk_key(chunk_coords),
-                        self.get_chunk_spec(chunk_coords, _config, prototype=prototype),
+                        self.metadata.get_chunk_spec(chunk_coords, _config, prototype=prototype),
                         chunk_selection,
                         out_selection,
                         is_complete_chunk,
@@ -1389,7 +1367,7 @@ async def getitem(
         indexer = BasicIndexer(
             selection,
             shape=self.metadata.shape,
-            chunk_grid=self.chunk_grid,
+            chunk_grid=self.metadata.chunk_grid,
         )
         return await self._get_selection(indexer, prototype=prototype)
 
@@ -1464,7 +1442,7 @@ async def _set_selection(
             [
                 (
                     self.store_path / self.metadata.encode_chunk_key(chunk_coords),
-                    self.get_chunk_spec(chunk_coords, _config, prototype),
+                    self.metadata.get_chunk_spec(chunk_coords, _config, prototype),
                     chunk_selection,
                     out_selection,
                     is_complete_chunk,
@@ -1519,7 +1497,7 @@ async def setitem(
         indexer = BasicIndexer(
             selection,
             shape=self.metadata.shape,
-            chunk_grid=self.chunk_grid,
+            chunk_grid=self.metadata.chunk_grid,
         )
         return await self._set_selection(indexer, value, prototype=prototype)
 
@@ -1556,8 +1534,8 @@ async def resize(self, new_shape: ShapeLike, delete_outside_chunks: bool = True)
 
         if delete_outside_chunks:
             # Remove all chunks outside of the new shape
-            old_chunk_coords = set(self.chunk_grid.all_chunk_coords(self.metadata.shape))
-            new_chunk_coords = set(self.chunk_grid.all_chunk_coords(new_shape))
+            old_chunk_coords = set(self.metadata.chunk_grid.all_chunk_coords(self.metadata.shape))
+            new_chunk_coords = set(self.metadata.chunk_grid.all_chunk_coords(new_shape))
 
             async def _delete_key(key: str) -> None:
                 await (self.store_path / key).delete()
@@ -2687,7 +2665,7 @@ def get_basic_selection(
             prototype = default_buffer_prototype()
         return sync(
             self._async_array._get_selection(
-                BasicIndexer(selection, self.shape, self._async_array.chunk_grid),
+                BasicIndexer(selection, self.shape, self.metadata.chunk_grid),
                 out=out,
                 fields=fields,
                 prototype=prototype,
@@ -2787,7 +2765,7 @@ def set_basic_selection(
         """
         if prototype is None:
             prototype = default_buffer_prototype()
-        indexer = BasicIndexer(selection, self.shape, self._async_array.chunk_grid)
+        indexer = BasicIndexer(selection, self.shape, self.metadata.chunk_grid)
         sync(self._async_array._set_selection(indexer, value, fields=fields, prototype=prototype))
 
     @_deprecate_positional_args
@@ -2908,7 +2886,7 @@ def get_orthogonal_selection(
         """
         if prototype is None:
             prototype = default_buffer_prototype()
-        indexer = OrthogonalIndexer(selection, self.shape, self._async_array.chunk_grid)
+        indexer = OrthogonalIndexer(selection, self.shape, self.metadata.chunk_grid)
         return sync(
             self._async_array._get_selection(
                 indexer=indexer, out=out, fields=fields, prototype=prototype
@@ -3021,7 +2999,7 @@ def set_orthogonal_selection(
         """
         if prototype is None:
             prototype = default_buffer_prototype()
-        indexer = OrthogonalIndexer(selection, self.shape, self._async_array.chunk_grid)
+        indexer = OrthogonalIndexer(selection, self.shape, self.metadata.chunk_grid)
         return sync(
             self._async_array._set_selection(indexer, value, fields=fields, prototype=prototype)
         )
@@ -3102,7 +3080,7 @@ def get_mask_selection(
 
         if prototype is None:
             prototype = default_buffer_prototype()
-        indexer = MaskIndexer(mask, self.shape, self._async_array.chunk_grid)
+        indexer = MaskIndexer(mask, self.shape, self.metadata.chunk_grid)
         return sync(
             self._async_array._get_selection(
                 indexer=indexer, out=out, fields=fields, prototype=prototype
@@ -3185,7 +3163,7 @@ def set_mask_selection(
         """
         if prototype is None:
             prototype = default_buffer_prototype()
-        indexer = MaskIndexer(mask, self.shape, self._async_array.chunk_grid)
+        indexer = MaskIndexer(mask, self.shape, self.metadata.chunk_grid)
         sync(self._async_array._set_selection(indexer, value, fields=fields, prototype=prototype))
 
     @_deprecate_positional_args
@@ -3266,7 +3244,7 @@ def get_coordinate_selection(
         """
         if prototype is None:
             prototype = default_buffer_prototype()
-        indexer = CoordinateIndexer(selection, self.shape, self._async_array.chunk_grid)
+        indexer = CoordinateIndexer(selection, self.shape, self.metadata.chunk_grid)
         out_array = sync(
             self._async_array._get_selection(
                 indexer=indexer, out=out, fields=fields, prototype=prototype
@@ -3352,7 +3330,7 @@ def set_coordinate_selection(
         if prototype is None:
             prototype = default_buffer_prototype()
         # setup indexer
-        indexer = CoordinateIndexer(selection, self.shape, self._async_array.chunk_grid)
+        indexer = CoordinateIndexer(selection, self.shape, self.metadata.chunk_grid)
 
         # handle value - need ndarray-like flatten value
         if not is_scalar(value, self.dtype):
@@ -3468,7 +3446,7 @@ def get_block_selection(
         """
         if prototype is None:
             prototype = default_buffer_prototype()
-        indexer = BlockIndexer(selection, self.shape, self._async_array.chunk_grid)
+        indexer = BlockIndexer(selection, self.shape, self.metadata.chunk_grid)
         return sync(
             self._async_array._get_selection(
                 indexer=indexer, out=out, fields=fields, prototype=prototype
@@ -3562,7 +3540,7 @@ def set_block_selection(
         """
         if prototype is None:
             prototype = default_buffer_prototype()
-        indexer = BlockIndexer(selection, self.shape, self._async_array.chunk_grid)
+        indexer = BlockIndexer(selection, self.shape, self.metadata.chunk_grid)
         sync(self._async_array._set_selection(indexer, value, fields=fields, prototype=prototype))
 
     @property
diff --git a/src/zarr/core/metadata/v2.py b/src/zarr/core/metadata/v2.py
index 585771b0b3..6f5d52a972 100644
--- a/src/zarr/core/metadata/v2.py
+++ b/src/zarr/core/metadata/v2.py
@@ -3,11 +3,13 @@
 import base64
 import warnings
 from collections.abc import Iterable, Sequence
+from functools import cached_property
 from typing import TYPE_CHECKING, Any, TypeAlias, TypedDict
 
 import numcodecs.abc
 
 from zarr.abc.metadata import Metadata
+from zarr.core.chunk_grids import RegularChunkGrid
 from zarr.core.dtype import get_data_type_from_native_dtype
 from zarr.core.dtype.wrapper import TBaseDType, TBaseScalar, TDType_co, TScalar_co, ZDType
 
@@ -103,6 +105,10 @@ def __init__(
     def ndim(self) -> int:
         return len(self.shape)
 
+    @cached_property
+    def chunk_grid(self) -> RegularChunkGrid:
+        return RegularChunkGrid(chunk_shape=self.chunks)
+
     @property
     def shards(self) -> ChunkCoords | None:
         return None
diff --git a/src/zarr/core/metadata/v3.py b/src/zarr/core/metadata/v3.py
index 1c62e4b41c..606d373cba 100644
--- a/src/zarr/core/metadata/v3.py
+++ b/src/zarr/core/metadata/v3.py
@@ -269,6 +269,20 @@ def inner_codecs(self) -> tuple[Codec, ...]:
                 return self.codecs[0].codecs
         return self.codecs
 
+    def get_chunk_spec(
+        self, _chunk_coords: ChunkCoords, array_config: ArrayConfig, prototype: BufferPrototype
+    ) -> ArraySpec:
+        assert isinstance(self.chunk_grid, RegularChunkGrid), (
+            "Currently, only regular chunk grid is supported"
+        )
+        return ArraySpec(
+            shape=self.chunk_grid.chunk_shape,
+            dtype=self.dtype,
+            fill_value=self.fill_value,
+            config=array_config,
+            prototype=prototype,
+        )
+
     def encode_chunk_key(self, chunk_coords: ChunkCoords) -> str:
         return self.chunk_key_encoding.encode_chunk_key(chunk_coords)
 
diff --git a/tests/test_array.py b/tests/test_array.py
index 99a5b8a0d7..7d6b877547 100644
--- a/tests/test_array.py
+++ b/tests/test_array.py
@@ -1363,7 +1363,7 @@ async def test_with_data(impl: Literal["sync", "async"], store: Store) -> None:
         elif impl == "async":
             arr = await create_array(store, name=name, data=data, zarr_format=3)
             stored = await arr._get_selection(
-                BasicIndexer(..., shape=arr.shape, chunk_grid=arr.chunk_grid),
+                BasicIndexer(..., shape=arr.shape, chunk_grid=arr.metadata.chunk_grid),
                 prototype=default_buffer_prototype(),
             )
         else:
diff --git a/tests/test_group.py b/tests/test_group.py
index 72f7575b8d..b4dace2568 100644
--- a/tests/test_group.py
+++ b/tests/test_group.py
@@ -1007,7 +1007,7 @@ async def test_asyncgroup_create_array(
     assert subnode.dtype == dtype
     # todo: fix the type annotation of array.metadata.chunk_grid so that we get some autocomplete
     # here.
-    assert subnode.chunk_grid.chunk_shape == chunk_shape
+    assert subnode.metadata.chunk_grid.chunk_shape == chunk_shape
     assert subnode.metadata.zarr_format == zarr_format
 
 

From be0d2dfb48c2696eabc7e77d5b755ba2d342b9a4 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Thu, 22 May 2025 13:23:15 +0200
Subject: [PATCH 101/130] use none to denote default fill value; remove old
 structured tests; use cast_value where appropriate

---
 src/zarr/api/synchronous.py      |  2 +-
 src/zarr/core/array.py           |  3 +-
 src/zarr/core/dtype/npy/sized.py | 23 ++++++--
 src/zarr/core/dtype/wrapper.py   |  6 +--
 src/zarr/core/metadata/v2.py     | 91 ++++----------------------------
 src/zarr/core/metadata/v3.py     |  2 +-
 tests/test_metadata/test_v2.py   |  8 ++-
 tests/test_v2.py                 | 48 ++---------------
 8 files changed, 41 insertions(+), 142 deletions(-)

diff --git a/src/zarr/api/synchronous.py b/src/zarr/api/synchronous.py
index 9a9f800881..6cabfed446 100644
--- a/src/zarr/api/synchronous.py
+++ b/src/zarr/api/synchronous.py
@@ -601,7 +601,7 @@ def create(
     chunks: ChunkCoords | int | bool | None = None,
     dtype: npt.DTypeLike | None = None,
     compressor: CompressorLike = "auto",
-    fill_value: Any | None = 0,  # TODO: need type
+    fill_value: Any | None = None,  # TODO: need type
     order: MemoryOrder | None = None,
     store: str | StoreLike | None = None,
     synchronizer: Any | None = None,
diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py
index 0e450d028a..e379ee660a 100644
--- a/src/zarr/core/array.py
+++ b/src/zarr/core/array.py
@@ -778,7 +778,8 @@ def _create_metadata_v2(
     ) -> ArrayV2Metadata:
         if dimension_separator is None:
             dimension_separator = "."
-
+        if fill_value is None:
+            fill_value = dtype.default_value()  # type: ignore[assignment]
         return ArrayV2Metadata(
             shape=shape,
             dtype=dtype,
diff --git a/src/zarr/core/dtype/npy/sized.py b/src/zarr/core/dtype/npy/sized.py
index 281c634856..7ca507b84e 100644
--- a/src/zarr/core/dtype/npy/sized.py
+++ b/src/zarr/core/dtype/npy/sized.py
@@ -79,7 +79,8 @@ def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.bytes_:
         raise TypeError(f"Invalid type: {data}. Expected a string.")  # pragma: no cover
 
     def check_value(self, data: object) -> bool:
-        return isinstance(data, np.bytes_ | str | bytes)
+        # this is generous for backwards compatibility
+        return isinstance(data, np.bytes_ | str | bytes | int)
 
     def _cast_value_unsafe(self, value: object) -> np.bytes_:
         return self.to_dtype().type(value)
@@ -168,7 +169,11 @@ def check_value(self, data: object) -> bool:
         return isinstance(data, np.bytes_ | str | bytes | np.void)
 
     def _cast_value_unsafe(self, value: object) -> np.void:
-        return self.to_dtype().type(value)  # type: ignore[call-overload, no-any-return]
+        native_dtype = self.to_dtype()
+        # Without the second argument, numpy will return a void scalar for dtype V1.
+        # The second argument ensures that, if native_dtype is something like V10,
+        # the result will actually be a V10 scalar.
+        return native_dtype.type(value, native_dtype)
 
 
 @dataclass(frozen=True, kw_only=True)
@@ -239,7 +244,8 @@ def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.str_:
         raise TypeError(f"Invalid type: {data}. Expected a string.")  # pragma: no cover
 
     def check_value(self, data: object) -> bool:
-        return isinstance(data, str | np.str_ | bytes)
+        # this is generous for backwards compatibility
+        return isinstance(data, str | np.str_ | bytes | int)
 
     def _cast_value_unsafe(self, value: object) -> np.str_:
         return self.to_dtype().type(value)
@@ -254,8 +260,15 @@ class Structured(ZDType[np.dtypes.VoidDType[int], np.void]):
     def default_value(self) -> np.void:
         return self._cast_value_unsafe(0)
 
-    def _cast_value_unsafe(self, value: object) -> np.void:
-        return cast("np.void", np.array([value], dtype=self.to_dtype())[0])
+    def _cast_value_unsafe(self, data: object) -> np.void:
+        na_dtype = self.to_dtype()
+        if isinstance(data, bytes):
+            res = np.frombuffer(data, dtype=na_dtype)[0]
+        elif isinstance(data, list | tuple):
+            res = np.array([tuple(data)], dtype=na_dtype)[0]
+        else:
+            res = np.array([data], dtype=na_dtype)[0]
+        return cast("np.void", res)
 
     @classmethod
     def check_dtype(cls, dtype: TBaseDType) -> TypeGuard[np.dtypes.VoidDType[int]]:
diff --git a/src/zarr/core/dtype/wrapper.py b/src/zarr/core/dtype/wrapper.py
index 3a56a85788..c8e060e764 100644
--- a/src/zarr/core/dtype/wrapper.py
+++ b/src/zarr/core/dtype/wrapper.py
@@ -160,9 +160,9 @@ def cast_value(self, data: object) -> TScalar_co:
         if self.check_value(data):
             return self._cast_value_unsafe(data)
         msg = (
-            f"The value {data} failed a type check."
-            f"It cannot be safely cast to a scalar compatible with {self.dtype_cls}."
-            f"Consult the documentation for {self} to determine the possible values that can"
+            f"The value {data} failed a type check. "
+            f"It cannot be safely cast to a scalar compatible with {self.dtype_cls}. "
+            f"Consult the documentation for {self} to determine the possible values that can "
             "be cast to scalars of the wrapped data type."
         )
         raise TypeError(msg)
diff --git a/src/zarr/core/metadata/v2.py b/src/zarr/core/metadata/v2.py
index 6f5d52a972..23a0275691 100644
--- a/src/zarr/core/metadata/v2.py
+++ b/src/zarr/core/metadata/v2.py
@@ -1,6 +1,5 @@
 from __future__ import annotations
 
-import base64
 import warnings
 from collections.abc import Iterable, Sequence
 from functools import cached_property
@@ -52,7 +51,7 @@ class ArrayV2Metadata(Metadata):
     shape: ChunkCoords
     chunks: ChunkCoords
     dtype: ZDType[TBaseDType, TBaseScalar]
-    fill_value: int | float | str | bytes | None = 0
+    fill_value: int | float | str | bytes | None = None
     order: MemoryOrder = "C"
     filters: tuple[numcodecs.abc.Codec, ...] | None = None
     dimension_separator: Literal[".", "/"] = "."
@@ -85,7 +84,11 @@ def __init__(
         order_parsed = parse_indexing_order(order)
         dimension_separator_parsed = parse_separator(dimension_separator)
         filters_parsed = parse_filters(filters)
-        fill_value_parsed = parse_fill_value(fill_value, dtype=dtype.to_dtype())
+        fill_value_parsed: TBaseScalar | None
+        if fill_value is not None:
+            fill_value_parsed = dtype.cast_value(fill_value)
+        else:
+            fill_value_parsed = fill_value
         attributes_parsed = parse_attributes(attributes)
 
         object.__setattr__(self, "shape", shape_parsed)
@@ -134,11 +137,10 @@ def from_dict(cls, data: dict[str, Any]) -> ArrayV2Metadata:
         _ = parse_zarr_format(_data.pop("zarr_format"))
         dtype = get_data_type_from_native_dtype(_data["dtype"])
         _data["dtype"] = dtype
-        if dtype.to_dtype().kind in "SV":
-            fill_value_encoded = _data.get("fill_value")
-            if fill_value_encoded is not None:
-                fill_value = base64.standard_b64decode(fill_value_encoded)
-                _data["fill_value"] = fill_value
+        fill_value_encoded = _data.get("fill_value")
+        if fill_value_encoded is not None:
+            fill_value = dtype.from_json_value(fill_value_encoded, zarr_format=2)
+            _data["fill_value"] = fill_value
 
         # zarr v2 allowed arbitrary keys here.
         # We don't want the ArrayV2Metadata constructor to fail just because someone put an
@@ -281,76 +283,3 @@ def parse_metadata(data: ArrayV2Metadata) -> ArrayV2Metadata:
         )
         raise ValueError(msg)
     return data
-
-
-def _parse_structured_fill_value(fill_value: Any, dtype: np.dtype[Any]) -> Any:
-    """Handle structured dtype/fill value pairs"""
-    try:
-        if isinstance(fill_value, list):
-            return np.array([tuple(fill_value)], dtype=dtype)[0]
-        elif isinstance(fill_value, tuple):
-            return np.array([fill_value], dtype=dtype)[0]
-        elif isinstance(fill_value, bytes):
-            return np.frombuffer(fill_value, dtype=dtype)[0]
-        elif isinstance(fill_value, str):
-            decoded = base64.standard_b64decode(fill_value)
-            return np.frombuffer(decoded, dtype=dtype)[0]
-        else:
-            return np.array(fill_value, dtype=dtype)[()]
-    except Exception as e:
-        raise ValueError(f"Fill_value {fill_value} is not valid for dtype {dtype}.") from e
-
-
-def parse_fill_value(fill_value: Any, dtype: np.dtype[Any]) -> Any:
-    """
-    Parse a potential fill value into a value that is compatible with the provided dtype.
-
-    Parameters
-    ----------
-    fill_value : Any
-        A potential fill value.
-    dtype : np.dtype[Any]
-        A numpy dtype.
-
-    Returns
-    -------
-        An instance of `dtype`, or `None`, or any python object (in the case of an object dtype)
-    """
-
-    if fill_value is None or dtype.hasobject:
-        pass
-    elif dtype.fields is not None:
-        # the dtype is structured (has multiple fields), so the fill_value might be a
-        # compound value (e.g., a tuple or dict) that needs field-wise processing.
-        # We use parse_structured_fill_value to correctly convert each component.
-        fill_value = _parse_structured_fill_value(fill_value, dtype)
-    elif not isinstance(fill_value, np.void) and fill_value == 0:
-        # this should be compatible across numpy versions for any array type, including
-        # structured arrays
-        fill_value = np.zeros((), dtype=dtype)[()]
-    elif dtype.kind == "U":
-        # special case unicode because of encoding issues on Windows if passed through numpy
-        # https://github.com/alimanfoo/zarr/pull/172#issuecomment-343782713
-
-        if not isinstance(fill_value, str):
-            raise ValueError(
-                f"fill_value {fill_value!r} is not valid for dtype {dtype}; must be a unicode string"
-            )
-    elif dtype.kind in "SV" and isinstance(fill_value, str):
-        fill_value = base64.standard_b64decode(fill_value)
-    elif dtype.kind == "c" and isinstance(fill_value, list) and len(fill_value) == 2:
-        complex_val = complex(float(fill_value[0]), float(fill_value[1]))
-        fill_value = np.array(complex_val, dtype=dtype)[()]
-    else:
-        try:
-            if isinstance(fill_value, bytes) and dtype.kind == "V":
-                # special case for numpy 1.14 compatibility
-                fill_value = np.array(fill_value, dtype=dtype.str).view(dtype)[()]
-            else:
-                fill_value = np.array(fill_value, dtype=dtype)[()]
-
-        except Exception as e:
-            msg = f"Fill_value {fill_value} is not valid for dtype {dtype}."
-            raise ValueError(msg) from e
-
-    return fill_value
diff --git a/src/zarr/core/metadata/v3.py b/src/zarr/core/metadata/v3.py
index 606d373cba..80ed722836 100644
--- a/src/zarr/core/metadata/v3.py
+++ b/src/zarr/core/metadata/v3.py
@@ -175,7 +175,7 @@ def __init__(
         chunk_key_encoding_parsed = ChunkKeyEncoding.from_dict(chunk_key_encoding)
         dimension_names_parsed = parse_dimension_names(dimension_names)
         # Note: relying on a type method is numpy-specific
-        fill_value_parsed = data_type.to_dtype().type(fill_value)
+        fill_value_parsed = data_type.cast_value(fill_value)
         attributes_parsed = parse_attributes(attributes)
         codecs_parsed_partial = parse_codecs(codecs)
         storage_transformers_parsed = parse_storage_transformers(storage_transformers)
diff --git a/tests/test_metadata/test_v2.py b/tests/test_metadata/test_v2.py
index aa8cfc4a31..5fd3ae8cc6 100644
--- a/tests/test_metadata/test_v2.py
+++ b/tests/test_metadata/test_v2.py
@@ -128,7 +128,7 @@ async def v2_consolidated_metadata(
                     "chunks": [730],
                     "compressor": None,
                     "dtype": "<f4",
-                    "fill_value": "0.0",
+                    "fill_value": 0.0,
                     "filters": None,
                     "order": "C",
                     "shape": [730],
@@ -147,7 +147,7 @@ async def v2_consolidated_metadata(
                     "chunks": [730],
                     "compressor": None,
                     "dtype": "<f4",
-                    "fill_value": "0.0",
+                    "fill_value": 0.0,
                     "filters": None,
                     "order": "C",
                     "shape": [730],
@@ -318,9 +318,7 @@ def test_zstd_checksum() -> None:
     assert "checksum" not in metadata["compressor"]
 
 
-@pytest.mark.parametrize(
-    "fill_value", [None, np.void((0, 0), np.dtype([("foo", "i4"), ("bar", "i4")]))]
-)
+@pytest.mark.parametrize("fill_value", [np.void((0, 0), np.dtype([("foo", "i4"), ("bar", "i4")]))])
 def test_structured_dtype_fill_value_serialization(tmp_path, fill_value):
     zarr_format = 2
     group_path = tmp_path / "test.zarr"
diff --git a/tests/test_v2.py b/tests/test_v2.py
index 145c3d58fb..51139bbeb4 100644
--- a/tests/test_v2.py
+++ b/tests/test_v2.py
@@ -15,7 +15,7 @@
 from zarr import config
 from zarr.abc.store import Store
 from zarr.core.buffer.core import default_buffer_prototype
-from zarr.core.metadata.v2 import _parse_structured_fill_value
+from zarr.core.dtype.npy.sized import Structured
 from zarr.core.sync import sync
 from zarr.storage import MemoryStore, StorePath
 
@@ -261,35 +261,18 @@ def test_structured_dtype_roundtrip(fill_value, tmp_path) -> None:
             np.dtype([("x", "i4"), ("y", "i4")]),
             np.array([(1, 2)], dtype=[("x", "i4"), ("y", "i4")])[0],
         ),
-        (
-            "BQAAAA==",
-            np.dtype([("val", "i4")]),
-            np.array([(5,)], dtype=[("val", "i4")])[0],
-        ),
-        (
-            {"x": 1, "y": 2},
-            np.dtype([("location", "O")]),
-            np.array([({"x": 1, "y": 2},)], dtype=[("location", "O")])[0],
-        ),
-        (
-            {"x": 1, "y": 2, "z": 3},
-            np.dtype([("location", "O")]),
-            np.array([({"x": 1, "y": 2, "z": 3},)], dtype=[("location", "O")])[0],
-        ),
     ],
     ids=[
         "tuple_input",
         "list_input",
         "bytes_input",
-        "string_input",
-        "dictionary_input",
-        "dictionary_input_extra_fields",
     ],
 )
 def test_parse_structured_fill_value_valid(
     fill_value: Any, dtype: np.dtype[Any], expected_result: Any
 ) -> None:
-    result = _parse_structured_fill_value(fill_value, dtype)
+    zdtype = Structured.from_dtype(dtype)
+    result = zdtype.cast_value(fill_value)
     assert result.dtype == expected_result.dtype
     assert result == expected_result
     if isinstance(expected_result, np.void):
@@ -297,31 +280,6 @@ def test_parse_structured_fill_value_valid(
             assert result[name] == expected_result[name]
 
 
-@pytest.mark.parametrize(
-    (
-        "fill_value",
-        "dtype",
-    ),
-    [
-        (("Alice", 30), np.dtype([("name", "U10"), ("age", "i4"), ("city", "U20")])),
-        (b"\x01\x00\x00\x00", np.dtype([("x", "i4"), ("y", "i4")])),
-        ("this_is_not_base64", np.dtype([("val", "i4")])),
-        ("hello", np.dtype([("age", "i4")])),
-        ({"x": 1, "y": 2}, np.dtype([("location", "i4")])),
-    ],
-    ids=[
-        "tuple_list_wrong_length",
-        "bytes_wrong_length",
-        "invalid_base64",
-        "wrong_data_type",
-        "wrong_dictionary",
-    ],
-)
-def test_parse_structured_fill_value_invalid(fill_value: Any, dtype: np.dtype[Any]) -> None:
-    with pytest.raises(ValueError):
-        _parse_structured_fill_value(fill_value, dtype)
-
-
 @pytest.mark.parametrize("fill_value", [None, b"x"], ids=["no_fill", "fill"])
 def test_other_dtype_roundtrip(fill_value, tmp_path) -> None:
     a = np.array([b"a\0\0", b"bb", b"ccc"], dtype="V7")

From 8c90d2ca827de0846f9ce65e045b24e5b5682527 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Thu, 22 May 2025 14:51:01 +0200
Subject: [PATCH 102/130] add item size abstraction

---
 src/zarr/codecs/blosc.py                  |  9 +++--
 src/zarr/codecs/bytes.py                  |  6 +--
 src/zarr/core/array.py                    |  9 +++--
 src/zarr/core/dtype/common.py             | 13 +++++++
 src/zarr/core/dtype/npy/bool.py           | 13 +++++--
 src/zarr/core/dtype/npy/complex.py        | 22 +++++++----
 src/zarr/core/dtype/npy/float.py          | 16 +++++++-
 src/zarr/core/dtype/npy/int.py            | 46 +++++++++++++++++++----
 src/zarr/core/dtype/npy/sized.py          | 45 +++++++++++++++-------
 src/zarr/core/dtype/npy/string.py         |  8 ++--
 src/zarr/core/dtype/npy/time.py           | 16 +++++---
 tests/conftest.py                         |  7 +++-
 tests/test_dtype/test_npy/test_bool.py    |  1 +
 tests/test_dtype/test_npy/test_complex.py |  3 ++
 tests/test_dtype/test_npy/test_float.py   | 21 ++++++++---
 tests/test_dtype/test_npy/test_int.py     |  9 +++++
 tests/test_dtype/test_npy/test_sized.py   | 20 ++++++++++
 tests/test_dtype/test_npy/test_string.py  |  3 ++
 tests/test_dtype/test_npy/test_time.py    |  2 +
 tests/test_dtype/test_wrapper.py          | 16 +++++++-
 20 files changed, 223 insertions(+), 62 deletions(-)

diff --git a/src/zarr/codecs/blosc.py b/src/zarr/codecs/blosc.py
index fc9b656847..1c5e52e9a4 100644
--- a/src/zarr/codecs/blosc.py
+++ b/src/zarr/codecs/blosc.py
@@ -13,6 +13,7 @@
 from zarr.abc.codec import BytesBytesCodec
 from zarr.core.buffer.cpu import as_numpy_array_wrapper
 from zarr.core.common import JSON, parse_enum, parse_named_configuration
+from zarr.core.dtype.common import HasItemSize
 from zarr.registry import register_codec
 
 if TYPE_CHECKING:
@@ -137,14 +138,16 @@ def to_dict(self) -> dict[str, JSON]:
         }
 
     def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self:
-        dtype = array_spec.dtype.to_dtype()
+        item_size = 1
+        if isinstance(array_spec.dtype, HasItemSize):
+            item_size = array_spec.dtype.item_size
         new_codec = self
         if new_codec.typesize is None:
-            new_codec = replace(new_codec, typesize=dtype.itemsize)
+            new_codec = replace(new_codec, typesize=item_size)
         if new_codec.shuffle is None:
             new_codec = replace(
                 new_codec,
-                shuffle=(BloscShuffle.bitshuffle if dtype.itemsize == 1 else BloscShuffle.shuffle),
+                shuffle=(BloscShuffle.bitshuffle if item_size == 1 else BloscShuffle.shuffle),
             )
 
         return new_codec
diff --git a/src/zarr/codecs/bytes.py b/src/zarr/codecs/bytes.py
index a87df060e7..5db39796e4 100644
--- a/src/zarr/codecs/bytes.py
+++ b/src/zarr/codecs/bytes.py
@@ -10,6 +10,7 @@
 from zarr.abc.codec import ArrayBytesCodec
 from zarr.core.buffer import Buffer, NDArrayLike, NDBuffer
 from zarr.core.common import JSON, parse_enum, parse_named_configuration
+from zarr.core.dtype.common import HasEndianness
 from zarr.core.dtype.npy.common import endianness_to_numpy_str
 from zarr.registry import register_codec
 
@@ -58,10 +59,7 @@ def to_dict(self) -> dict[str, JSON]:
             return {"name": "bytes", "configuration": {"endian": self.endian.value}}
 
     def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self:
-        # Note: this check is numpy-dtype-specific
-        # For single-byte (e.g., uint8) or 0-byte (e.g., S0) dtypes,
-        # endianness does not apply.
-        if array_spec.dtype.to_dtype().itemsize < 2:
+        if not isinstance(array_spec.dtype, HasEndianness):
             if self.endian is not None:
                 return replace(self, endian=None)
         elif self.endian is None:
diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py
index e379ee660a..a4e8c7c3d1 100644
--- a/src/zarr/core/array.py
+++ b/src/zarr/core/array.py
@@ -72,6 +72,7 @@
     ZDTypeLike,
     parse_data_type,
 )
+from zarr.core.dtype.common import HasItemSize
 from zarr.core.indexing import (
     BasicIndexer,
     BasicSelection,
@@ -586,11 +587,13 @@ async def _create(
 
         if chunks is not None and chunk_shape is not None:
             raise ValueError("Only one of chunk_shape or chunks can be provided.")
-
+        item_size = 1
+        if isinstance(dtype_parsed, HasItemSize):
+            item_size = dtype_parsed.item_size
         if chunks:
-            _chunks = normalize_chunks(chunks, shape, dtype_parsed.to_dtype().itemsize)
+            _chunks = normalize_chunks(chunks, shape, item_size)
         else:
-            _chunks = normalize_chunks(chunk_shape, shape, dtype_parsed.to_dtype().itemsize)
+            _chunks = normalize_chunks(chunk_shape, shape, item_size)
         config_parsed = parse_array_config(config)
 
         result: AsyncArray[ArrayV3Metadata] | AsyncArray[ArrayV2Metadata]
diff --git a/src/zarr/core/dtype/common.py b/src/zarr/core/dtype/common.py
index ecc475192c..d4aded658d 100644
--- a/src/zarr/core/dtype/common.py
+++ b/src/zarr/core/dtype/common.py
@@ -30,3 +30,16 @@ class HasEndianness:
     """
 
     endianness: Endianness | None = "little"
+
+
+@dataclass(frozen=True)
+class HasItemSize:
+    """
+    A mix-in class for data types with an item size attribute.
+    This mix-in bears a property ``item_size``, which denotes the size of each element of the data
+    type, in bytes.
+    """
+
+    @property
+    def item_size(self) -> int:
+        raise NotImplementedError
diff --git a/src/zarr/core/dtype/npy/bool.py b/src/zarr/core/dtype/npy/bool.py
index c80033c54e..d46758f789 100644
--- a/src/zarr/core/dtype/npy/bool.py
+++ b/src/zarr/core/dtype/npy/bool.py
@@ -4,12 +4,13 @@
 import numpy as np
 
 from zarr.core.common import JSON, ZarrFormat
+from zarr.core.dtype.common import HasItemSize
 from zarr.core.dtype.npy.common import check_json_bool
 from zarr.core.dtype.wrapper import TBaseDType, ZDType
 
 
 @dataclass(frozen=True, kw_only=True, slots=True)
-class Bool(ZDType[np.dtypes.BoolDType, np.bool_]):
+class Bool(ZDType[np.dtypes.BoolDType, np.bool_], HasItemSize):
     """
     Wrapper for numpy boolean dtype.
 
@@ -65,7 +66,7 @@ def default_value(self) -> np.bool_:
         """
         return np.False_
 
-    def to_json_value(self, data: object, zarr_format: ZarrFormat) -> bool:
+    def to_json_value(self, data: object, *, zarr_format: ZarrFormat) -> bool:
         """
         Convert a scalar to a python bool.
 
@@ -107,5 +108,9 @@ def check_value(self, data: object) -> bool:
         # Anything can become a bool
         return True
 
-    def _cast_value_unsafe(self, value: object) -> np.bool_:
-        return np.bool_(value)
+    def _cast_value_unsafe(self, data: object) -> np.bool_:
+        return np.bool_(data)
+
+    @property
+    def item_size(self) -> int:
+        return 1
diff --git a/src/zarr/core/dtype/npy/complex.py b/src/zarr/core/dtype/npy/complex.py
index 3e5f640946..ee52dd0577 100644
--- a/src/zarr/core/dtype/npy/complex.py
+++ b/src/zarr/core/dtype/npy/complex.py
@@ -10,7 +10,7 @@
 import numpy as np
 
 from zarr.core.common import JSON, ZarrFormat
-from zarr.core.dtype.common import HasEndianness
+from zarr.core.dtype.common import HasEndianness, HasItemSize
 from zarr.core.dtype.npy.common import (
     ComplexLike,
     TComplexDType_co,
@@ -31,7 +31,7 @@
 
 
 @dataclass(frozen=True)
-class BaseComplex(ZDType[TComplexDType_co, TComplexScalar_co], HasEndianness):
+class BaseComplex(ZDType[TComplexDType_co, TComplexScalar_co], HasEndianness, HasItemSize):
     # This attribute holds the possible zarr v2 JSON names for the data type
     _zarr_v2_names: ClassVar[tuple[str, ...]]
 
@@ -83,11 +83,11 @@ def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[JSON]:
             return data == cls._zarr_v3_name
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
-    def check_value(self, value: object) -> bool:
-        return isinstance(value, ComplexLike)
+    def check_value(self, data: object) -> bool:
+        return isinstance(data, ComplexLike)
 
-    def _cast_value_unsafe(self, value: object) -> TComplexScalar_co:
-        return self.to_dtype().type(value)  # type: ignore[arg-type, return-value]
+    def _cast_value_unsafe(self, data: object) -> TComplexScalar_co:
+        return self.to_dtype().type(data)  # type: ignore[arg-type, return-value]
 
     def default_value(self) -> TComplexScalar_co:
         """
@@ -130,7 +130,7 @@ def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> TComplexSca
             )
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
-    def to_json_value(self, data: object, zarr_format: ZarrFormat) -> JSON:
+    def to_json_value(self, data: object, *, zarr_format: ZarrFormat) -> JSON:
         """
         Convert an object to a JSON-serializable float.
 
@@ -160,9 +160,17 @@ class Complex64(BaseComplex[np.dtypes.Complex64DType, np.complex64]):
     _zarr_v3_name = "complex64"
     _zarr_v2_names: ClassVar[tuple[str, ...]] = (">c8", "<c8")
 
+    @property
+    def item_size(self) -> int:
+        return 8
+
 
 @dataclass(frozen=True, kw_only=True)
 class Complex128(BaseComplex[np.dtypes.Complex128DType, np.complex128], HasEndianness):
     dtype_cls = np.dtypes.Complex128DType
     _zarr_v3_name = "complex128"
     _zarr_v2_names: ClassVar[tuple[str, ...]] = (">c16", "<c16")
+
+    @property
+    def item_size(self) -> int:
+        return 16
diff --git a/src/zarr/core/dtype/npy/float.py b/src/zarr/core/dtype/npy/float.py
index e4d6e42ef3..28f3ced63e 100644
--- a/src/zarr/core/dtype/npy/float.py
+++ b/src/zarr/core/dtype/npy/float.py
@@ -4,7 +4,7 @@
 import numpy as np
 
 from zarr.core.common import JSON, ZarrFormat
-from zarr.core.dtype.common import HasEndianness
+from zarr.core.dtype.common import HasEndianness, HasItemSize
 from zarr.core.dtype.npy.common import (
     EndiannessNumpy,
     FloatLike,
@@ -23,7 +23,7 @@
 
 
 @dataclass(frozen=True)
-class BaseFloat(ZDType[TFloatDType_co, TFloatScalar_co], HasEndianness):
+class BaseFloat(ZDType[TFloatDType_co, TFloatScalar_co], HasEndianness, HasItemSize):
     # This attribute holds the possible zarr v2 JSON names for the data type
     _zarr_v2_names: ClassVar[tuple[str, ...]]
 
@@ -156,6 +156,10 @@ class Float16(BaseFloat[np.dtypes.Float16DType, np.float16]):
     _zarr_v3_name = "float16"
     _zarr_v2_names: ClassVar[tuple[str, ...]] = (">f2", "<f2")
 
+    @property
+    def item_size(self) -> int:
+        return 2
+
 
 @dataclass(frozen=True, kw_only=True)
 class Float32(BaseFloat[np.dtypes.Float32DType, np.float32]):
@@ -163,9 +167,17 @@ class Float32(BaseFloat[np.dtypes.Float32DType, np.float32]):
     _zarr_v3_name = "float32"
     _zarr_v2_names: ClassVar[tuple[str, ...]] = (">f4", "<f4")
 
+    @property
+    def item_size(self) -> int:
+        return 4
+
 
 @dataclass(frozen=True, kw_only=True)
 class Float64(BaseFloat[np.dtypes.Float64DType, np.float64]):
     dtype_cls = np.dtypes.Float64DType
     _zarr_v3_name = "float64"
     _zarr_v2_names: ClassVar[tuple[str, ...]] = (">f8", "<f8")
+
+    @property
+    def item_size(self) -> int:
+        return 8
diff --git a/src/zarr/core/dtype/npy/int.py b/src/zarr/core/dtype/npy/int.py
index 78d9499243..db5869b202 100644
--- a/src/zarr/core/dtype/npy/int.py
+++ b/src/zarr/core/dtype/npy/int.py
@@ -4,7 +4,7 @@
 import numpy as np
 
 from zarr.core.common import JSON, ZarrFormat
-from zarr.core.dtype.common import HasEndianness
+from zarr.core.dtype.common import HasEndianness, HasItemSize
 from zarr.core.dtype.npy.common import (
     EndiannessNumpy,
     check_json_int,
@@ -32,7 +32,7 @@
 
 
 @dataclass(frozen=True)
-class BaseInt(ZDType[TIntDType_co, TIntScalar_co]):
+class BaseInt(ZDType[TIntDType_co, TIntScalar_co], HasItemSize):
     # This attribute holds the possible zarr v2 JSON names for the data type
     _zarr_v2_names: ClassVar[tuple[str, ...]]
 
@@ -67,11 +67,11 @@ def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[JSON]:
             return data == cls._zarr_v3_name
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
-    def check_value(self, value: object) -> TypeGuard[IntLike]:
-        return isinstance(value, IntLike)
+    def check_value(self, data: object) -> TypeGuard[IntLike]:
+        return isinstance(data, IntLike)
 
-    def _cast_value_unsafe(self, value: object) -> TIntScalar_co:
-        return self.to_dtype().type(value)  # type: ignore[return-value, arg-type]
+    def _cast_value_unsafe(self, data: object) -> TIntScalar_co:
+        return self.to_dtype().type(data)  # type: ignore[return-value, arg-type]
 
     def default_value(self) -> TIntScalar_co:
         """
@@ -104,7 +104,7 @@ def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> TIntScalar_
             return self._cast_value_unsafe(data)
         raise TypeError(f"Invalid type: {data}. Expected an integer.")
 
-    def to_json_value(self, data: object, zarr_format: ZarrFormat) -> int:
+    def to_json_value(self, data: object, *, zarr_format: ZarrFormat) -> int:
         """
         Convert an object to JSON-serializable scalar.
 
@@ -140,6 +140,10 @@ def to_dtype(self: Self) -> np.dtypes.Int8DType:
     def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
         return cls()
 
+    @property
+    def item_size(self) -> int:
+        return 1
+
 
 @dataclass(frozen=True, kw_only=True)
 class UInt8(BaseInt[np.dtypes.UInt8DType, np.uint8]):
@@ -158,6 +162,10 @@ def to_dtype(self: Self) -> np.dtypes.UInt8DType:
     def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
         return cls()
 
+    @property
+    def item_size(self) -> int:
+        return 1
+
 
 @dataclass(frozen=True, kw_only=True)
 class Int16(BaseInt[np.dtypes.Int16DType, np.int16], HasEndianness):
@@ -183,6 +191,10 @@ def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
             return cls()
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
+    @property
+    def item_size(self) -> int:
+        return 2
+
 
 @dataclass(frozen=True, kw_only=True)
 class UInt16(BaseInt[np.dtypes.UInt16DType, np.uint16], HasEndianness):
@@ -207,6 +219,10 @@ def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
             return cls()
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
+    @property
+    def item_size(self) -> int:
+        return 2
+
 
 @dataclass(frozen=True, kw_only=True)
 class Int32(BaseInt[np.dtypes.Int32DType, np.int32], HasEndianness):
@@ -243,6 +259,10 @@ def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
             return cls()
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
+    @property
+    def item_size(self) -> int:
+        return 4
+
 
 @dataclass(frozen=True, kw_only=True)
 class UInt32(BaseInt[np.dtypes.UInt32DType, np.uint32], HasEndianness):
@@ -267,6 +287,10 @@ def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
             return cls()
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
+    @property
+    def item_size(self) -> int:
+        return 4
+
 
 @dataclass(frozen=True, kw_only=True)
 class Int64(BaseInt[np.dtypes.Int64DType, np.int64], HasEndianness):
@@ -291,6 +315,10 @@ def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
             return cls()
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
+    @property
+    def item_size(self) -> int:
+        return 8
+
 
 @dataclass(frozen=True, kw_only=True)
 class UInt64(BaseInt[np.dtypes.UInt64DType, np.uint64], HasEndianness):
@@ -314,3 +342,7 @@ def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
         elif zarr_format == 3:
             return cls()
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+
+    @property
+    def item_size(self) -> int:
+        return 8
diff --git a/src/zarr/core/dtype/npy/sized.py b/src/zarr/core/dtype/npy/sized.py
index 7ca507b84e..2b2ed2ac70 100644
--- a/src/zarr/core/dtype/npy/sized.py
+++ b/src/zarr/core/dtype/npy/sized.py
@@ -7,7 +7,7 @@
 import numpy as np
 
 from zarr.core.common import JSON, ZarrFormat
-from zarr.core.dtype.common import DataTypeValidationError, HasEndianness, HasLength
+from zarr.core.dtype.common import DataTypeValidationError, HasEndianness, HasItemSize, HasLength
 from zarr.core.dtype.npy.common import (
     EndiannessNumpy,
     bytes_from_json,
@@ -20,7 +20,7 @@
 
 
 @dataclass(frozen=True, kw_only=True)
-class FixedLengthAscii(ZDType[np.dtypes.BytesDType[int], np.bytes_], HasLength):
+class FixedLengthAscii(ZDType[np.dtypes.BytesDType[int], np.bytes_], HasLength, HasItemSize):
     dtype_cls = np.dtypes.BytesDType
     _zarr_v3_name = "numpy.fixed_length_ascii"
 
@@ -85,9 +85,13 @@ def check_value(self, data: object) -> bool:
     def _cast_value_unsafe(self, value: object) -> np.bytes_:
         return self.to_dtype().type(value)
 
+    @property
+    def item_size(self) -> int:
+        return self.length
+
 
 @dataclass(frozen=True, kw_only=True)
-class FixedLengthBytes(ZDType[np.dtypes.VoidDType[int], np.void], HasLength):
+class FixedLengthBytes(ZDType[np.dtypes.VoidDType[int], np.void], HasLength, HasItemSize):
     # np.dtypes.VoidDType is specified in an odd way in numpy
     # it cannot be used to create instances of the dtype
     # so we have to tell mypy to ignore this here
@@ -168,25 +172,31 @@ def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.void:
     def check_value(self, data: object) -> bool:
         return isinstance(data, np.bytes_ | str | bytes | np.void)
 
-    def _cast_value_unsafe(self, value: object) -> np.void:
+    def _cast_value_unsafe(self, data: object) -> np.void:
         native_dtype = self.to_dtype()
         # Without the second argument, numpy will return a void scalar for dtype V1.
         # The second argument ensures that, if native_dtype is something like V10,
         # the result will actually be a V10 scalar.
-        return native_dtype.type(value, native_dtype)
+        return native_dtype.type(data, native_dtype)
+
+    @property
+    def item_size(self) -> int:
+        return self.length
 
 
 @dataclass(frozen=True, kw_only=True)
-class FixedLengthUnicode(ZDType[np.dtypes.StrDType[int], np.str_], HasEndianness, HasLength):
+class FixedLengthUnicode(
+    ZDType[np.dtypes.StrDType[int], np.str_], HasEndianness, HasLength, HasItemSize
+):
     dtype_cls = np.dtypes.StrDType
     _zarr_v3_name = "numpy.fixed_length_ucs4"
-    item_size_bytes: ClassVar[int] = 4  # UCS4 is 4 bytes per code point
+    code_point_bytes: ClassVar[int] = 4  # UCS4 is 4 bytes per code point
 
     @classmethod
     def _from_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
         byte_order = cast("EndiannessNumpy", dtype.byteorder)
         return cls(
-            length=dtype.itemsize // (cls.item_size_bytes),
+            length=dtype.itemsize // (cls.code_point_bytes),
             endianness=endianness_from_numpy_str(byte_order),
         )
 
@@ -220,7 +230,7 @@ def to_json(self, zarr_format: ZarrFormat) -> JSON:
         elif zarr_format == 3:
             return {
                 "name": self._zarr_v3_name,
-                "configuration": {"length_bytes": self.length * self.item_size_bytes},
+                "configuration": {"length_bytes": self.length * self.code_point_bytes},
             }
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
@@ -229,7 +239,7 @@ def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
         if zarr_format == 2:
             return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
         elif zarr_format == 3:
-            return cls(length=data["configuration"]["length_bytes"] // cls.item_size_bytes)  # type: ignore[arg-type, index, call-overload, operator]
+            return cls(length=data["configuration"]["length_bytes"] // cls.code_point_bytes)  # type: ignore[arg-type, index, call-overload, operator]
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     def default_value(self) -> np.str_:
@@ -247,12 +257,16 @@ def check_value(self, data: object) -> bool:
         # this is generous for backwards compatibility
         return isinstance(data, str | np.str_ | bytes | int)
 
-    def _cast_value_unsafe(self, value: object) -> np.str_:
-        return self.to_dtype().type(value)
+    def _cast_value_unsafe(self, data: object) -> np.str_:
+        return self.to_dtype().type(data)
+
+    @property
+    def item_size(self) -> int:
+        return self.length * self.code_point_bytes
 
 
 @dataclass(frozen=True, kw_only=True)
-class Structured(ZDType[np.dtypes.VoidDType[int], np.void]):
+class Structured(ZDType[np.dtypes.VoidDType[int], np.void], HasItemSize):
     dtype_cls = np.dtypes.VoidDType  # type: ignore[assignment]
     _zarr_v3_name = "structured"
     fields: tuple[tuple[str, ZDType[TBaseDType, TBaseScalar]], ...]
@@ -395,3 +409,8 @@ def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.void:
             dtype = self.to_dtype()
             return cast("np.void", np.array([as_bytes]).view(dtype)[0])
         raise TypeError(f"Invalid type: {data}. Expected a string.")  # pragma: no cover
+
+    @property
+    def item_size(self) -> int:
+        # Lets have numpy do the arithmetic here
+        return self.to_dtype().itemsize
diff --git a/src/zarr/core/dtype/npy/string.py b/src/zarr/core/dtype/npy/string.py
index 3849fd05ce..d5a4f9be08 100644
--- a/src/zarr/core/dtype/npy/string.py
+++ b/src/zarr/core/dtype/npy/string.py
@@ -72,8 +72,8 @@ def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> str:
         def check_value(self, data: object) -> bool:
             return isinstance(data, str)
 
-        def _cast_value_unsafe(self, value: object) -> str:
-            return str(value)
+        def _cast_value_unsafe(self, data: object) -> str:
+            return str(data)
 
 else:
     # Numpy pre-2 does not have a variable length string dtype, so we use the Object dtype instead.
@@ -130,5 +130,5 @@ def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> str:
         def check_value(self, data: object) -> bool:
             return isinstance(data, str)
 
-        def _cast_value_unsafe(self, value: object) -> str:
-            return str(value)
+        def _cast_value_unsafe(self, data: object) -> str:
+            return str(data)
diff --git a/src/zarr/core/dtype/npy/time.py b/src/zarr/core/dtype/npy/time.py
index ea44d76b56..61786351f8 100644
--- a/src/zarr/core/dtype/npy/time.py
+++ b/src/zarr/core/dtype/npy/time.py
@@ -17,7 +17,7 @@
 
 import numpy as np
 
-from zarr.core.dtype.common import HasEndianness
+from zarr.core.dtype.common import HasEndianness, HasItemSize
 from zarr.core.dtype.npy.common import (
     DateTimeUnit,
     EndiannessNumpy,
@@ -99,7 +99,7 @@ class TimeConfig(TypedDict):
 
 
 @dataclass(frozen=True, kw_only=True, slots=True)
-class TimeDTypeBase(ZDType[_BaseTimeDType_co, _BaseTimeScalar], HasEndianness):
+class TimeDTypeBase(ZDType[_BaseTimeDType_co, _BaseTimeScalar], HasEndianness, HasItemSize):
     _zarr_v2_names: ClassVar[tuple[str, ...]]
     # this attribute exists so that we can programmatically create a numpy dtype instance
     # because the particular numpy dtype we are wrapping does not allow direct construction via
@@ -163,6 +163,10 @@ def check_value(self, data: object) -> bool:
         except ValueError:
             return False
 
+    @property
+    def item_size(self) -> int:
+        return 8
+
 
 @dataclass(frozen=True, kw_only=True, slots=True)
 class TimeDelta64(TimeDTypeBase[np.dtypes.TimeDelta64DType, np.timedelta64], HasEndianness):
@@ -188,8 +192,8 @@ def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.timedelt
             return self.to_dtype().type(data, f"{self.scale_factor}{self.unit}")  # type: ignore[arg-type]
         raise TypeError(f"Invalid type: {data}. Expected an integer.")  # pragma: no cover
 
-    def _cast_value_unsafe(self, value: object) -> np.timedelta64:
-        return self.to_dtype().type(value)  # type: ignore[arg-type]
+    def _cast_value_unsafe(self, data: object) -> np.timedelta64:
+        return self.to_dtype().type(data)  # type: ignore[arg-type]
 
     @classmethod
     def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[JSON]:
@@ -235,8 +239,8 @@ def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.datetime
             return self.to_dtype().type(data, f"{self.scale_factor}{self.unit}")  # type: ignore[arg-type]
         raise TypeError(f"Invalid type: {data}. Expected an integer.")  # pragma: no cover
 
-    def _cast_value_unsafe(self, value: object) -> np.datetime64:
-        return self.to_dtype().type(value)  # type: ignore[no-any-return, call-overload]
+    def _cast_value_unsafe(self, data: object) -> np.datetime64:
+        return self.to_dtype().type(data)  # type: ignore[no-any-return, call-overload]
 
     @classmethod
     def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[JSON]:
diff --git a/tests/conftest.py b/tests/conftest.py
index 663e2663b8..725de1b529 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -23,6 +23,7 @@
 from zarr.core.dtype import (
     get_data_type_from_native_dtype,
 )
+from zarr.core.dtype.common import HasItemSize
 from zarr.core.metadata.v2 import ArrayV2Metadata
 from zarr.core.metadata.v3 import ArrayV3Metadata
 from zarr.core.sync import sync
@@ -268,12 +269,14 @@ def create_array_metadata(
     chunk_key_encoding_parsed = _parse_chunk_key_encoding(
         chunk_key_encoding, zarr_format=zarr_format
     )
-
+    item_size = 1
+    if isinstance(dtype_parsed, HasItemSize):
+        item_size = dtype_parsed.item_size
     shard_shape_parsed, chunk_shape_parsed = _auto_partition(
         array_shape=shape_parsed,
         shard_shape=shards,
         chunk_shape=chunks,
-        item_size=dtype_parsed.to_dtype().itemsize,
+        item_size=item_size,
     )
 
     if order is None:
diff --git a/tests/test_dtype/test_npy/test_bool.py b/tests/test_dtype/test_npy/test_bool.py
index 086a2cfee8..1adae57f02 100644
--- a/tests/test_dtype/test_npy/test_bool.py
+++ b/tests/test_dtype/test_npy/test_bool.py
@@ -38,3 +38,4 @@ class TestBool(_TestZDType):
         (Bool(), np.True_, np.True_),
         (Bool(), np.False_, np.False_),
     )
+    item_size_params = (Bool(),)
diff --git a/tests/test_dtype/test_npy/test_complex.py b/tests/test_dtype/test_npy/test_complex.py
index b24bc4d7c8..45a3a1480e 100644
--- a/tests/test_dtype/test_npy/test_complex.py
+++ b/tests/test_dtype/test_npy/test_complex.py
@@ -52,6 +52,8 @@ class TestComplex64(_BaseTestFloat):
         (Complex64(), complex(0, math.nan), np.complex64(complex(0, math.nan))),
     )
 
+    item_size_params = (Complex64(),)
+
 
 class TestComplex128(_BaseTestFloat):
     test_cls = Complex128
@@ -89,3 +91,4 @@ class TestComplex128(_BaseTestFloat):
         (Complex128(), complex(-1.0, math.inf), np.complex128(complex(-1.0, math.inf))),
         (Complex128(), complex(0, math.nan), np.complex128(complex(0, math.nan))),
     )
+    item_size_params = (Complex128(),)
diff --git a/tests/test_dtype/test_npy/test_float.py b/tests/test_dtype/test_npy/test_float.py
index ba43b6bcf6..daa9bafac0 100644
--- a/tests/test_dtype/test_npy/test_float.py
+++ b/tests/test_dtype/test_npy/test_float.py
@@ -12,14 +12,16 @@ def scalar_equals(self, scalar1: object, scalar2: object) -> bool:
             return True
         return super().scalar_equals(scalar1, scalar2)
 
-    hex_nan_params: tuple[str, ...] = ()
+    hex_string_params: tuple[tuple[str, float], ...] = ()
 
-    def test_hex_nan(self, hex_nan_params: str) -> None:
+    def test_hex_encoding(self, hex_string_params: tuple[str, float]) -> None:
         """
         Test that hexadecimal strings can be read as NaN values
         """
+        hex_string, expected = hex_string_params
         zdtype = self.test_cls()
-        assert np.isnan(zdtype.from_json_value(hex_nan_params, zarr_format=3))
+        observed = zdtype.from_json_value(hex_string, zarr_format=3)
+        assert self.scalar_equals(observed, expected)
 
 
 class TestFloat16(_BaseTestFloat):
@@ -61,7 +63,8 @@ class TestFloat16(_BaseTestFloat):
         (Float16(), "NaN", np.float16("NaN")),
     )
 
-    hex_nan_params = ("0x7fc0", "0x7fc1")
+    hex_string_params = (("0x7fc0", np.nan), ("0x7fc1", np.nan), ("0x3c00", 1.0))
+    item_size_params = (Float16(),)
 
 
 class TestFloat32(_BaseTestFloat):
@@ -105,7 +108,8 @@ class TestFloat32(_BaseTestFloat):
         (Float32(), "NaN", np.float32("NaN")),
     )
 
-    hex_nan_params = ("0x7fc00000", "0x7fc00001")
+    hex_string_params = (("0x7fc00000", np.nan), ("0x7fc00001", np.nan), ("0x3f800000", 1.0))
+    item_size_params = (Float32(),)
 
 
 class TestFloat64(_BaseTestFloat):
@@ -148,4 +152,9 @@ class TestFloat64(_BaseTestFloat):
         (Float64(), "NaN", np.float64("NaN")),
     )
 
-    hex_nan_params = ("0x7ff8000000000000", "0x7ff8000000000001")
+    hex_string_params = (
+        ("0x7ff8000000000000", np.nan),
+        ("0x7ff8000000000001", np.nan),
+        ("0x3ff0000000000000", 1.0),
+    )
+    item_size_params = (Float64(),)
diff --git a/tests/test_dtype/test_npy/test_int.py b/tests/test_dtype/test_npy/test_int.py
index 637b594e1b..5b0180af3b 100644
--- a/tests/test_dtype/test_npy/test_int.py
+++ b/tests/test_dtype/test_npy/test_int.py
@@ -34,6 +34,7 @@ class TestInt8(_TestZDType):
         (Int8(), 1, np.int8(1)),
         (Int8(), -1, np.int8(-1)),
     )
+    item_size_params = (Int8(),)
 
 
 class TestInt16(_TestZDType):
@@ -65,6 +66,8 @@ class TestInt16(_TestZDType):
         (Int16(), -1, np.int16(-1)),
     )
 
+    item_size_params = (Int16(),)
+
 
 class TestInt32(_TestZDType):
     test_cls = Int32
@@ -94,6 +97,7 @@ class TestInt32(_TestZDType):
         (Int32(), 1, np.int32(1)),
         (Int32(), -1, np.int32(-1)),
     )
+    item_size_params = (Int32(),)
 
 
 class TestInt64(_TestZDType):
@@ -124,6 +128,7 @@ class TestInt64(_TestZDType):
         (Int64(), 1, np.int64(1)),
         (Int64(), -1, np.int64(-1)),
     )
+    item_size_params = (Int64(),)
 
 
 class TestUInt8(_TestZDType):
@@ -154,6 +159,7 @@ class TestUInt8(_TestZDType):
         (UInt8(), 1, np.uint8(1)),
         (UInt8(), 0, np.uint8(0)),
     )
+    item_size_params = (UInt8(),)
 
 
 class TestUInt16(_TestZDType):
@@ -184,6 +190,7 @@ class TestUInt16(_TestZDType):
         (UInt16(), 1, np.uint16(1)),
         (UInt16(), 0, np.uint16(0)),
     )
+    item_size_params = (UInt16(),)
 
 
 class TestUInt32(_TestZDType):
@@ -214,6 +221,7 @@ class TestUInt32(_TestZDType):
         (UInt32(), 1, np.uint32(1)),
         (UInt32(), 0, np.uint32(0)),
     )
+    item_size_params = (UInt32(),)
 
 
 class TestUInt64(_TestZDType):
@@ -244,3 +252,4 @@ class TestUInt64(_TestZDType):
         (UInt64(), 1, np.uint64(1)),
         (UInt64(), 0, np.uint64(0)),
     )
+    item_size_params = (UInt64(),)
diff --git a/tests/test_dtype/test_npy/test_sized.py b/tests/test_dtype/test_npy/test_sized.py
index 2ded5bbb7c..202bb0d04e 100644
--- a/tests/test_dtype/test_npy/test_sized.py
+++ b/tests/test_dtype/test_npy/test_sized.py
@@ -50,6 +50,11 @@ class TestFixedLengthAscii(_TestZDType):
         (FixedLengthAscii(length=2), "ab", np.bytes_("ab")),
         (FixedLengthAscii(length=4), "abcd", np.bytes_("abcd")),
     )
+    item_size_params = (
+        FixedLengthAscii(length=0),
+        FixedLengthAscii(length=4),
+        FixedLengthAscii(length=10),
+    )
 
 
 class TestFixedLengthBytes(_TestZDType):
@@ -91,6 +96,11 @@ class TestFixedLengthBytes(_TestZDType):
         (FixedLengthBytes(length=2), b"ab", np.void(b"ab")),
         (FixedLengthBytes(length=4), b"abcd", np.void(b"abcd")),
     )
+    item_size_params = (
+        FixedLengthBytes(length=0),
+        FixedLengthBytes(length=4),
+        FixedLengthBytes(length=10),
+    )
 
 
 class TestFixedLengthUnicode(_TestZDType):
@@ -125,6 +135,11 @@ class TestFixedLengthUnicode(_TestZDType):
         (FixedLengthUnicode(length=2), "hi", np.str_("hi")),
         (FixedLengthUnicode(length=4), "hihi", np.str_("hihi")),
     )
+    item_size_params = (
+        FixedLengthUnicode(length=0),
+        FixedLengthUnicode(length=4),
+        FixedLengthUnicode(length=10),
+    )
 
 
 class TestStructured(_TestZDType):
@@ -214,3 +229,8 @@ def scalar_equals(self, scalar1: Any, scalar2: Any) -> bool:
         if hasattr(scalar1, "shape") and hasattr(scalar2, "shape"):
             return np.array_equal(scalar1, scalar2)
         return super().scalar_equals(scalar1, scalar2)
+
+    item_size_params = (
+        Structured(fields=(("field1", Int32()), ("field2", Float64()))),
+        Structured(fields=(("field1", Int64()), ("field2", Int32()))),
+    )
diff --git a/tests/test_dtype/test_npy/test_string.py b/tests/test_dtype/test_npy/test_string.py
index c87f538be5..1046afcac0 100644
--- a/tests/test_dtype/test_npy/test_string.py
+++ b/tests/test_dtype/test_npy/test_string.py
@@ -37,6 +37,7 @@ class TestVariableLengthString(_TestZDType):
             (VariableLengthString(), "", np.str_("")),
             (VariableLengthString(), "hi", np.str_("hi")),
         )
+        item_size_params = (VariableLengthString(),)
 
 else:
 
@@ -70,3 +71,5 @@ class TestVariableLengthString(_TestZDType):  # type: ignore[no-redef]
             (VariableLengthString(), "", np.str_("")),
             (VariableLengthString(), "hi", np.str_("hi")),
         )
+
+        item_size_params = (VariableLengthString(),)
diff --git a/tests/test_dtype/test_npy/test_time.py b/tests/test_dtype/test_npy/test_time.py
index f8f8b5ae47..90c573007f 100644
--- a/tests/test_dtype/test_npy/test_time.py
+++ b/tests/test_dtype/test_npy/test_time.py
@@ -63,6 +63,7 @@ class TestDateTime64(_TestTimeBase):
         (DateTime64(unit="s", scale_factor=1), "2005-02-25", np.datetime64("2005-02-25", "s")),
         (DateTime64(unit="ns", scale_factor=1), "NaT", np.datetime64("NaT")),
     )
+    item_size_params = (DateTime64(unit="ns", scale_factor=1),)
 
 
 class TestTimeDelta64(_TestTimeBase):
@@ -102,6 +103,7 @@ class TestTimeDelta64(_TestTimeBase):
         (TimeDelta64(unit="ns", scale_factor=1), "1", np.timedelta64(1, "ns")),
         (TimeDelta64(unit="ns", scale_factor=1), "NaT", np.timedelta64("NaT")),
     )
+    item_size_params = (TimeDelta64(unit="ns", scale_factor=1),)
 
 
 def test_time_invalid_unit() -> None:
diff --git a/tests/test_dtype/test_wrapper.py b/tests/test_dtype/test_wrapper.py
index 608e272690..302a419c0f 100644
--- a/tests/test_dtype/test_wrapper.py
+++ b/tests/test_dtype/test_wrapper.py
@@ -2,6 +2,10 @@
 
 from typing import TYPE_CHECKING, Any, ClassVar
 
+import pytest
+
+from zarr.core.dtype.common import HasItemSize
+
 if TYPE_CHECKING:
     from zarr.core.dtype.wrapper import TBaseDType, TBaseScalar, ZDType
 
@@ -74,8 +78,8 @@ class _TestZDType:
 
     scalar_v2_params: ClassVar[tuple[tuple[Any, Any], ...]] = ()
     scalar_v3_params: ClassVar[tuple[tuple[Any, Any], ...]] = ()
-
     cast_value_params: ClassVar[tuple[tuple[Any, Any, Any], ...]]
+    item_size_params: ClassVar[tuple[ZDType[Any, Any], ...]]
 
     def json_scalar_equals(self, scalar1: object, scalar2: object) -> bool:
         # An equality check for json-encoded scalars. This defaults to regular equality,
@@ -119,3 +123,13 @@ def test_cast_value(self, cast_value_params: tuple[Any, Any, Any]) -> None:
         zdtype, value, expected = cast_value_params
         observed = zdtype.cast_value(value)
         assert self.scalar_equals(expected, observed)
+
+    def test_item_size(self, item_size_params: ZDType[Any, Any]) -> None:
+        """
+        Test that the item_size attribute matches the numpy dtype itemsize attribute, for dtypes
+        with a fixed scalar size.
+        """
+        if isinstance(item_size_params, HasItemSize):
+            assert item_size_params.item_size == item_size_params.to_dtype().itemsize
+        else:
+            pytest.skip(f"Dtype {item_size_params} does not implement HasItemSize")

From 7c58f7ab40990c77b4eea82cac558d1d1ded9621 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Thu, 22 May 2025 21:31:09 +0200
Subject: [PATCH 103/130] rename fixed-length string dtypes, and be strict
 about the numpy object dtype (i.e., refuse to match it)

---
 src/zarr/api/asynchronous.py            | 10 ++--
 src/zarr/api/synchronous.py             |  2 +-
 src/zarr/core/dtype/__init__.py         | 12 ++---
 src/zarr/core/dtype/npy/sized.py        |  8 ++--
 src/zarr/core/dtype/registry.py         | 13 ++++++
 src/zarr/core/metadata/dtype.py         |  0
 tests/conftest.py                       | 10 ++++
 tests/test_array.py                     | 24 ++++++----
 tests/test_dtype/test_npy/test_sized.py | 62 ++++++++++++-------------
 tests/test_dtype_registry.py            |  8 ++--
 tests/test_v2.py                        | 18 +++++--
 11 files changed, 102 insertions(+), 65 deletions(-)
 delete mode 100644 src/zarr/core/metadata/dtype.py

diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py
index 7ecbacd3f6..ad3a81a64d 100644
--- a/src/zarr/api/asynchronous.py
+++ b/src/zarr/api/asynchronous.py
@@ -31,7 +31,7 @@
     _warn_order_kwarg,
     _warn_write_empty_chunks_kwarg,
 )
-from zarr.core.dtype import get_data_type_from_native_dtype
+from zarr.core.dtype import ZDTypeLike, get_data_type_from_native_dtype, parse_data_type
 from zarr.core.group import (
     AsyncGroup,
     ConsolidatedMetadata,
@@ -843,7 +843,7 @@ async def create(
     shape: ChunkCoords | int,
     *,  # Note: this is a change from v2
     chunks: ChunkCoords | int | None = None,  # TODO: v2 allowed chunks=True
-    dtype: npt.DTypeLike | None = None,
+    dtype: ZDTypeLike | None = None,
     compressor: CompressorLike = "auto",
     fill_value: Any | None = 0,  # TODO: need type
     order: MemoryOrder | None = None,
@@ -990,11 +990,11 @@ async def create(
         _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format)
         or _default_zarr_format()
     )
-    dtype_wrapped = get_data_type_from_native_dtype(dtype)
+    zdtype = parse_data_type(dtype, zarr_format=zarr_format)
     if zarr_format == 2:
         if chunks is None:
             chunks = shape
-        default_filters, default_compressor = _get_default_chunk_encoding_v2(dtype_wrapped)
+        default_filters, default_compressor = _get_default_chunk_encoding_v2(zdtype)
         if not filters:
             filters = default_filters  # type: ignore[assignment]
         if compressor == "auto":
@@ -1056,7 +1056,7 @@ async def create(
         store_path,
         shape=shape,
         chunks=chunks,
-        dtype=dtype_wrapped,
+        dtype=zdtype,
         compressor=compressor,
         fill_value=fill_value,
         overwrite=overwrite,
diff --git a/src/zarr/api/synchronous.py b/src/zarr/api/synchronous.py
index 694e8a3d7a..db5862a0ee 100644
--- a/src/zarr/api/synchronous.py
+++ b/src/zarr/api/synchronous.py
@@ -599,7 +599,7 @@ def create(
     shape: ChunkCoords | int,
     *,  # Note: this is a change from v2
     chunks: ChunkCoords | int | bool | None = None,
-    dtype: npt.DTypeLike | None = None,
+    dtype: ZDTypeLike | None = None,
     compressor: CompressorLike = "auto",
     fill_value: Any | None = None,  # TODO: need type
     order: MemoryOrder | None = None,
diff --git a/src/zarr/core/dtype/__init__.py b/src/zarr/core/dtype/__init__.py
index b973691f0f..5d51db92db 100644
--- a/src/zarr/core/dtype/__init__.py
+++ b/src/zarr/core/dtype/__init__.py
@@ -8,9 +8,9 @@
 from zarr.core.dtype.npy.float import Float16, Float32, Float64
 from zarr.core.dtype.npy.int import Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64
 from zarr.core.dtype.npy.sized import (
-    FixedLengthAscii,
+    FixedLengthASCII,
     FixedLengthBytes,
-    FixedLengthUnicode,
+    FixedLengthUTF32,
     Structured,
 )
 from zarr.core.dtype.npy.time import DateTime64, TimeDelta64
@@ -36,9 +36,9 @@
     "DataTypeRegistry",
     "DataTypeValidationError",
     "DateTime64",
-    "FixedLengthAscii",
+    "FixedLengthASCII",
     "FixedLengthBytes",
-    "FixedLengthUnicode",
+    "FixedLengthUTF32",
     "Float16",
     "Float32",
     "Float64",
@@ -72,8 +72,8 @@
 ComplexFloatDType = Complex64 | Complex128
 COMPLEX_FLOAT_DTYPE: Final = Complex64, Complex128
 
-StringDType = FixedLengthUnicode | VariableLengthString | FixedLengthAscii
-STRING_DTYPE: Final = FixedLengthUnicode, VariableLengthString, FixedLengthAscii
+StringDType = FixedLengthUTF32 | VariableLengthString | FixedLengthASCII
+STRING_DTYPE: Final = FixedLengthUTF32, VariableLengthString, FixedLengthASCII
 
 TimeDType = DateTime64 | TimeDelta64
 TIME_DTYPE: Final = DateTime64, TimeDelta64
diff --git a/src/zarr/core/dtype/npy/sized.py b/src/zarr/core/dtype/npy/sized.py
index 2b2ed2ac70..bf54638890 100644
--- a/src/zarr/core/dtype/npy/sized.py
+++ b/src/zarr/core/dtype/npy/sized.py
@@ -20,7 +20,7 @@
 
 
 @dataclass(frozen=True, kw_only=True)
-class FixedLengthAscii(ZDType[np.dtypes.BytesDType[int], np.bytes_], HasLength, HasItemSize):
+class FixedLengthASCII(ZDType[np.dtypes.BytesDType[int], np.bytes_], HasLength, HasItemSize):
     dtype_cls = np.dtypes.BytesDType
     _zarr_v3_name = "numpy.fixed_length_ascii"
 
@@ -185,12 +185,12 @@ def item_size(self) -> int:
 
 
 @dataclass(frozen=True, kw_only=True)
-class FixedLengthUnicode(
+class FixedLengthUTF32(
     ZDType[np.dtypes.StrDType[int], np.str_], HasEndianness, HasLength, HasItemSize
 ):
     dtype_cls = np.dtypes.StrDType
-    _zarr_v3_name = "numpy.fixed_length_ucs4"
-    code_point_bytes: ClassVar[int] = 4  # UCS4 is 4 bytes per code point
+    _zarr_v3_name = "numpy.fixed_length_utf32"
+    code_point_bytes: ClassVar[int] = 4  # utf32 is 4 bytes per code point
 
     @classmethod
     def _from_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
diff --git a/src/zarr/core/dtype/registry.py b/src/zarr/core/dtype/registry.py
index ae5c3d426e..047f908ac6 100644
--- a/src/zarr/core/dtype/registry.py
+++ b/src/zarr/core/dtype/registry.py
@@ -3,6 +3,8 @@
 from dataclasses import dataclass, field
 from typing import TYPE_CHECKING, Self
 
+import numpy as np
+
 from zarr.core.dtype.common import DataTypeValidationError
 
 if TYPE_CHECKING:
@@ -38,6 +40,17 @@ def get(self, key: str) -> type[ZDType[TBaseDType, TBaseScalar]]:
 
     def match_dtype(self, dtype: TBaseDType) -> ZDType[TBaseDType, TBaseScalar]:
         self.lazy_load()
+        if dtype == np.dtype("O"):
+            msg = (
+                "Data type resolution failed. "
+                'Attempted to resolve a zarr data type from a numpy "Object" data type, which is '
+                'ambiguous, as multiple zarr data types can be represented by the numpy "Object" '
+                "data type. "
+                "In this case you should construct your array by providing a specific Zarr data "
+                'type. For a list of Zarr data types that are compatible with the numpy "Object"'
+                "data type, see xxxxxxxxxxx"
+            )
+            raise ValueError(msg)
         for val in self.contents.values():
             try:
                 return val.from_dtype(dtype)
diff --git a/src/zarr/core/metadata/dtype.py b/src/zarr/core/metadata/dtype.py
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/tests/conftest.py b/tests/conftest.py
index 725de1b529..a968016e6f 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -39,6 +39,7 @@
     from zarr.core.array import CompressorsLike, FiltersLike, SerializerLike, ShardsLike
     from zarr.core.chunk_key_encodings import ChunkKeyEncoding, ChunkKeyEncodingLike
     from zarr.core.common import ChunkCoords, MemoryOrder, ShapeLike, ZarrFormat
+    from zarr.core.dtype.wrapper import ZDType
 
 
 async def parse_store(
@@ -417,3 +418,12 @@ def meta_from_array(
         chunk_key_encoding=chunk_key_encoding,
         dimension_names=dimension_names,
     )
+
+
+def skip_object_dtype(dtype: ZDType[Any, Any]) -> None:
+    if dtype.dtype_cls is type(np.dtype("O")):
+        msg = (
+            f"{dtype} uses the numpy object data type, which is not a valid target for data "
+            "type resolution"
+        )
+        pytest.skip(msg)
diff --git a/tests/test_array.py b/tests/test_array.py
index bea4f30cc6..0cc32c7806 100644
--- a/tests/test_array.py
+++ b/tests/test_array.py
@@ -18,6 +18,7 @@
 
 import zarr.api.asynchronous
 import zarr.api.synchronous as sync_api
+from tests.conftest import skip_object_dtype
 from zarr import Array, AsyncArray, Group
 from zarr.abc.store import Store
 from zarr.codecs import (
@@ -43,8 +44,8 @@
 from zarr.core.dtype import get_data_type_from_native_dtype
 from zarr.core.dtype.common import Endianness
 from zarr.core.dtype.npy.common import endianness_from_numpy_str
-from zarr.core.dtype.npy.float import Float64
-from zarr.core.dtype.npy.int import Int16
+from zarr.core.dtype.npy.float import Float32, Float64
+from zarr.core.dtype.npy.int import Int16, UInt8
 from zarr.core.dtype.npy.sized import (
     Structured,
 )
@@ -1009,9 +1010,11 @@ def test_dtype_forms(dtype: ZDType[Any, Any], store: Store, zarr_format: ZarrFor
         """
         Test that the same array is produced from a ZDType instance, a numpy dtype, or a numpy string
         """
+        skip_object_dtype(dtype)
         a = zarr.create_array(
             store, name="a", shape=(5,), chunks=(5,), dtype=dtype, zarr_format=zarr_format
         )
+
         b = zarr.create_array(
             store,
             name="b",
@@ -1054,12 +1057,13 @@ def test_dtype_roundtrip(
         """
         Test that creating an array, then opening it, gets the same array.
         """
+        skip_object_dtype(dtype)
         a = zarr.create_array(store, shape=(5,), chunks=(5,), dtype=dtype, zarr_format=zarr_format)
         b = zarr.open_array(store)
         assert a.dtype == b.dtype
 
     @staticmethod
-    @pytest.mark.parametrize("dtype", ["uint8", "float32", "str", "U3", "S4", "V1"])
+    @pytest.mark.parametrize("dtype", ["uint8", "float32", "U3", "S4", "V1"])
     @pytest.mark.parametrize(
         "compressors",
         [
@@ -1244,7 +1248,7 @@ async def test_invalid_v3_arguments(
             zarr.create(store=store, dtype="uint8", shape=(10,), zarr_format=3, **kwargs)
 
     @staticmethod
-    @pytest.mark.parametrize("dtype", ["uint8", "float32", "str"])
+    @pytest.mark.parametrize("dtype", ["uint8", "float32"])
     @pytest.mark.parametrize(
         "compressors",
         [
@@ -1284,17 +1288,17 @@ async def test_v2_chunk_encoding(
         assert arr.filters == filters_expected
 
     @staticmethod
-    @pytest.mark.parametrize("dtype_str", ["uint8", "float32", "str"])
+    @pytest.mark.parametrize("dtype", [UInt8(), Float32(), VariableLengthString()])
     async def test_default_filters_compressors(
-        store: MemoryStore, dtype_str: str, zarr_format: ZarrFormat
+        store: MemoryStore, dtype: UInt8 | Float32 | VariableLengthString, zarr_format: ZarrFormat
     ) -> None:
         """
         Test that the default ``filters`` and ``compressors`` are used when ``create_array`` is invoked with ``filters`` and ``compressors`` unspecified.
         """
-        zdtype = get_data_type_from_native_dtype(dtype_str)
+
         arr = await create_array(
             store=store,
-            dtype=dtype_str,
+            dtype=dtype,
             shape=(10,),
             zarr_format=zarr_format,
         )
@@ -1306,14 +1310,14 @@ async def test_default_filters_compressors(
                 compressors=sig.parameters["compressors"].default,
                 filters=sig.parameters["filters"].default,
                 serializer=sig.parameters["serializer"].default,
-                dtype=zdtype,
+                dtype=dtype,
             )
 
         elif zarr_format == 2:
             default_filters, default_compressors = _parse_chunk_encoding_v2(
                 compressor=sig.parameters["compressors"].default,
                 filters=sig.parameters["filters"].default,
-                dtype=zdtype,
+                dtype=dtype,
             )
             if default_filters is None:
                 expected_filters = ()
diff --git a/tests/test_dtype/test_npy/test_sized.py b/tests/test_dtype/test_npy/test_sized.py
index 202bb0d04e..8bc83f2f73 100644
--- a/tests/test_dtype/test_npy/test_sized.py
+++ b/tests/test_dtype/test_npy/test_sized.py
@@ -8,15 +8,15 @@
 from zarr.core.dtype.npy.float import Float16, Float64
 from zarr.core.dtype.npy.int import Int32, Int64
 from zarr.core.dtype.npy.sized import (
-    FixedLengthAscii,
+    FixedLengthASCII,
     FixedLengthBytes,
-    FixedLengthUnicode,
+    FixedLengthUTF32,
     Structured,
 )
 
 
 class TestFixedLengthAscii(_TestZDType):
-    test_cls = FixedLengthAscii
+    test_cls = FixedLengthASCII
     valid_dtype = (np.dtype("|S10"), np.dtype("|S4"))
     invalid_dtype = (
         np.dtype(np.int8),
@@ -36,24 +36,24 @@ class TestFixedLengthAscii(_TestZDType):
     )
 
     scalar_v2_params = (
-        (FixedLengthAscii(length=0), ""),
-        (FixedLengthAscii(length=2), "YWI="),
-        (FixedLengthAscii(length=4), "YWJjZA=="),
+        (FixedLengthASCII(length=0), ""),
+        (FixedLengthASCII(length=2), "YWI="),
+        (FixedLengthASCII(length=4), "YWJjZA=="),
     )
     scalar_v3_params = (
-        (FixedLengthAscii(length=0), ""),
-        (FixedLengthAscii(length=2), "YWI="),
-        (FixedLengthAscii(length=4), "YWJjZA=="),
+        (FixedLengthASCII(length=0), ""),
+        (FixedLengthASCII(length=2), "YWI="),
+        (FixedLengthASCII(length=4), "YWJjZA=="),
     )
     cast_value_params = (
-        (FixedLengthAscii(length=0), "", np.bytes_("")),
-        (FixedLengthAscii(length=2), "ab", np.bytes_("ab")),
-        (FixedLengthAscii(length=4), "abcd", np.bytes_("abcd")),
+        (FixedLengthASCII(length=0), "", np.bytes_("")),
+        (FixedLengthASCII(length=2), "ab", np.bytes_("ab")),
+        (FixedLengthASCII(length=4), "abcd", np.bytes_("abcd")),
     )
     item_size_params = (
-        FixedLengthAscii(length=0),
-        FixedLengthAscii(length=4),
-        FixedLengthAscii(length=10),
+        FixedLengthASCII(length=0),
+        FixedLengthASCII(length=4),
+        FixedLengthASCII(length=10),
     )
 
 
@@ -103,8 +103,8 @@ class TestFixedLengthBytes(_TestZDType):
     )
 
 
-class TestFixedLengthUnicode(_TestZDType):
-    test_cls = FixedLengthUnicode
+class TestFixedLengthUTF32(_TestZDType):
+    test_cls = FixedLengthUTF32
     valid_dtype = (np.dtype(">U10"), np.dtype("<U10"))
     invalid_dtype = (
         np.dtype(np.int8),
@@ -112,33 +112,33 @@ class TestFixedLengthUnicode(_TestZDType):
         np.dtype("|S10"),
     )
     valid_json_v2 = (">U10", "<U10")
-    valid_json_v3 = ({"name": "numpy.fixed_length_ucs4", "configuration": {"length_bytes": 320}},)
+    valid_json_v3 = ({"name": "numpy.fixed_length_utf32", "configuration": {"length_bytes": 320}},)
     invalid_json_v2 = (
         "|U",
         "|S10",
         "|f8",
     )
     invalid_json_v3 = (
-        {"name": "numpy.fixed_length_ucs4", "configuration": {"length_bits": 0}},
-        {"name": "numpy.fixed_length_ucs4", "configuration": {"length_bits": "invalid"}},
+        {"name": "numpy.fixed_length_utf32", "configuration": {"length_bits": 0}},
+        {"name": "numpy.fixed_length_utf32", "configuration": {"length_bits": "invalid"}},
     )
 
-    scalar_v2_params = ((FixedLengthUnicode(length=0), ""), (FixedLengthUnicode(length=2), "hi"))
+    scalar_v2_params = ((FixedLengthUTF32(length=0), ""), (FixedLengthUTF32(length=2), "hi"))
     scalar_v3_params = (
-        (FixedLengthUnicode(length=0), ""),
-        (FixedLengthUnicode(length=2), "hi"),
-        (FixedLengthUnicode(length=4), "hihi"),
+        (FixedLengthUTF32(length=0), ""),
+        (FixedLengthUTF32(length=2), "hi"),
+        (FixedLengthUTF32(length=4), "hihi"),
     )
 
     cast_value_params = (
-        (FixedLengthUnicode(length=0), "", np.str_("")),
-        (FixedLengthUnicode(length=2), "hi", np.str_("hi")),
-        (FixedLengthUnicode(length=4), "hihi", np.str_("hihi")),
+        (FixedLengthUTF32(length=0), "", np.str_("")),
+        (FixedLengthUTF32(length=2), "hi", np.str_("hi")),
+        (FixedLengthUTF32(length=4), "hihi", np.str_("hihi")),
     )
     item_size_params = (
-        FixedLengthUnicode(length=0),
-        FixedLengthUnicode(length=4),
-        FixedLengthUnicode(length=10),
+        FixedLengthUTF32(length=0),
+        FixedLengthUTF32(length=4),
+        FixedLengthUTF32(length=10),
     )
 
 
@@ -180,7 +180,7 @@ class TestStructured(_TestZDType):
                     ),
                     (
                         "field2",
-                        {"name": "numpy.fixed_length_ucs4", "configuration": {"length_bytes": 32}},
+                        {"name": "numpy.fixed_length_utf32", "configuration": {"length_bytes": 32}},
                     ),
                 ]
             },
diff --git a/tests/test_dtype_registry.py b/tests/test_dtype_registry.py
index aaca2f0862..35c704673d 100644
--- a/tests/test_dtype_registry.py
+++ b/tests/test_dtype_registry.py
@@ -9,13 +9,14 @@
 import pytest
 
 import zarr
+from tests.conftest import skip_object_dtype
 from zarr.core.config import config
 from zarr.core.dtype import (
     AnyDType,
     Bool,
     DataTypeRegistry,
     DateTime64,
-    FixedLengthUnicode,
+    FixedLengthUTF32,
     Int8,
     Int16,
     TBaseDType,
@@ -65,7 +66,7 @@ def default_value(self) -> np.bool_:
 
     @staticmethod
     @pytest.mark.parametrize(
-        ("wrapper_cls", "dtype_str"), [(Bool, "bool"), (FixedLengthUnicode, "|U4")]
+        ("wrapper_cls", "dtype_str"), [(Bool, "bool"), (FixedLengthUTF32, "|U4")]
     )
     def test_match_dtype(
         data_type_registry_fixture: DataTypeRegistry,
@@ -100,7 +101,7 @@ def test_registered_dtypes(
         """
         Test that the registered dtypes can be retrieved from the registry.
         """
-
+        skip_object_dtype(zdtype)
         assert data_type_registry.match_dtype(zdtype.to_dtype()) == zdtype
         assert (
             data_type_registry.match_json(
@@ -121,6 +122,7 @@ def test_match_dtype_unique(
         that excludes the data type class being tested, and ensure that an instance of the wrapped data type
         fails to match anything in the registry
         """
+        skip_object_dtype(zdtype)
         for _cls in get_args(AnyDType):
             if _cls is not type(zdtype):
                 data_type_registry_fixture.register(_cls._zarr_v3_name, _cls)
diff --git a/tests/test_v2.py b/tests/test_v2.py
index 51139bbeb4..1b21e09952 100644
--- a/tests/test_v2.py
+++ b/tests/test_v2.py
@@ -15,7 +15,9 @@
 from zarr import config
 from zarr.abc.store import Store
 from zarr.core.buffer.core import default_buffer_prototype
-from zarr.core.dtype.npy.sized import Structured
+from zarr.core.dtype.npy.sized import FixedLengthASCII, FixedLengthUTF32, Structured
+from zarr.core.dtype.npy.string import VariableLengthString
+from zarr.core.dtype.wrapper import ZDType
 from zarr.core.sync import sync
 from zarr.storage import MemoryStore, StorePath
 
@@ -101,10 +103,16 @@ async def test_v2_encode_decode(dtype, expected_dtype, fill_value, fill_value_js
         np.testing.assert_equal(data, expected)
 
 
-@pytest.mark.parametrize(("dtype", "value"), [("|S1", b"Y"), ("|U1", "Y"), (str, "Y")])
-def test_v2_encode_decode_with_data(dtype, value):
-    dtype, value = dtype, value
-    expected = np.full((3,), value, dtype=dtype)
+@pytest.mark.parametrize(
+    ("dtype", "value"),
+    [
+        (FixedLengthASCII(length=1), b"Y"),
+        (FixedLengthUTF32(length=1), "Y"),
+        (VariableLengthString(), "Y"),
+    ],
+)
+def test_v2_encode_decode_with_data(dtype: ZDType[Any, Any], value: str):
+    expected = np.full((3,), value, dtype=dtype.to_dtype())
     a = zarr.create(
         shape=(3,),
         zarr_format=2,

From 3a21845ca4e962232479944f2de6a4f210b497c6 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Thu, 22 May 2025 21:46:41 +0200
Subject: [PATCH 104/130] remove vestigial use of to_dtype().itemsize()

---
 src/zarr/core/array.py | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py
index a4e8c7c3d1..cc67c9040f 100644
--- a/src/zarr/core/array.py
+++ b/src/zarr/core/array.py
@@ -4197,7 +4197,7 @@ async def init_array(
 
     from zarr.codecs.sharding import ShardingCodec, ShardingCodecIndexLocation
 
-    dtype_wrapped = parse_data_type(dtype, zarr_format=zarr_format)
+    zdtype = parse_data_type(dtype, zarr_format=zarr_format)
     shape_parsed = parse_shapelike(shape)
     chunk_key_encoding_parsed = _parse_chunk_key_encoding(
         chunk_key_encoding, zarr_format=zarr_format
@@ -4211,11 +4211,15 @@ async def init_array(
     else:
         await ensure_no_existing_node(store_path, zarr_format=zarr_format)
 
+    item_size = 1
+    if isinstance(zdtype, HasItemSize):
+        item_size = zdtype.item_size
+
     shard_shape_parsed, chunk_shape_parsed = _auto_partition(
         array_shape=shape_parsed,
         shard_shape=shards,
         chunk_shape=chunks,
-        item_size=dtype_wrapped.to_dtype().itemsize,
+        item_size=item_size,
     )
     chunks_out: tuple[int, ...]
     meta: ArrayV2Metadata | ArrayV3Metadata
@@ -4231,7 +4235,7 @@ async def init_array(
             raise ValueError("Zarr format 2 arrays do not support `serializer`.")
 
         filters_parsed, compressor_parsed = _parse_chunk_encoding_v2(
-            compressor=compressors, filters=filters, dtype=dtype_wrapped
+            compressor=compressors, filters=filters, dtype=zdtype
         )
         if dimension_names is not None:
             raise ValueError("Zarr format 2 arrays do not support dimension names.")
@@ -4242,7 +4246,7 @@ async def init_array(
 
         meta = AsyncArray._create_metadata_v2(
             shape=shape_parsed,
-            dtype=dtype_wrapped,
+            dtype=zdtype,
             chunks=chunk_shape_parsed,
             dimension_separator=chunk_key_encoding_parsed.separator,
             fill_value=fill_value,
@@ -4256,7 +4260,7 @@ async def init_array(
             compressors=compressors,
             filters=filters,
             serializer=serializer,
-            dtype=dtype_wrapped,
+            dtype=zdtype,
         )
         sub_codecs = cast(tuple[Codec, ...], (*array_array, array_bytes, *bytes_bytes))
         codecs_out: tuple[Codec, ...]
@@ -4271,7 +4275,7 @@ async def init_array(
             )
             sharding_codec.validate(
                 shape=chunk_shape_parsed,
-                dtype=dtype_wrapped,
+                dtype=zdtype,
                 chunk_grid=RegularChunkGrid(chunk_shape=shard_shape_parsed),
             )
             codecs_out = (sharding_codec,)
@@ -4287,7 +4291,7 @@ async def init_array(
 
         meta = AsyncArray._create_metadata_v3(
             shape=shape_parsed,
-            dtype=dtype_wrapped,
+            dtype=zdtype,
             fill_value=fill_value,
             chunk_shape=chunks_out,
             chunk_key_encoding=chunk_key_encoding_parsed,

From ce0afe3379836b24db66520289173821b19c72cd Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Thu, 22 May 2025 21:49:12 +0200
Subject: [PATCH 105/130] remove another vestigial use of to_dtype().itemsize()

---
 src/zarr/core/array.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py
index cc67c9040f..d87db52bb4 100644
--- a/src/zarr/core/array.py
+++ b/src/zarr/core/array.py
@@ -72,7 +72,7 @@
     ZDTypeLike,
     parse_data_type,
 )
-from zarr.core.dtype.common import HasItemSize
+from zarr.core.dtype.common import HasEndianness, HasItemSize
 from zarr.core.indexing import (
     BasicIndexer,
     BasicSelection,
@@ -4731,7 +4731,7 @@ def _parse_chunk_encoding_v3(
     # TODO: ensure that the serializer is compatible with the ndarray produced by the
     # array-array codecs. For example, if a sequence of array-array codecs produces an
     # array with a single-byte data type, then the serializer should not specify endiannesss.
-    if isinstance(out_array_bytes, BytesCodec) and dtype.to_dtype().itemsize == 1:
+    if isinstance(out_array_bytes, BytesCodec) and not isinstance(dtype, HasEndianness):
         # The default endianness in the bytescodec might not be None, so we need to replace it
         out_array_bytes = replace(out_array_bytes, endian=None)
     return out_array_array, out_array_bytes, out_bytes_bytes

From e67d4dcbb6d55931bf4238712c9607a56baff14f Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Fri, 23 May 2025 10:49:19 +0200
Subject: [PATCH 106/130] emit warning about unstable dtype when serializing
 Structured dtype to JSON

---
 src/zarr/core/dtype/common.py    | 20 ++++++++++++++++++++
 src/zarr/core/dtype/npy/sized.py |  9 ++++++++-
 src/zarr/core/dtype/wrapper.py   | 13 -------------
 tests/test_array.py              |  6 +++++-
 tests/test_dtype/conftest.py     |  7 ++++++-
 tests/test_dtype/test_wrapper.py |  1 +
 tests/test_dtype_registry.py     |  2 ++
 7 files changed, 42 insertions(+), 16 deletions(-)

diff --git a/src/zarr/core/dtype/common.py b/src/zarr/core/dtype/common.py
index d4aded658d..5eeff2af5b 100644
--- a/src/zarr/core/dtype/common.py
+++ b/src/zarr/core/dtype/common.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import warnings
 from dataclasses import dataclass
 from typing import Final, Literal
 
@@ -43,3 +44,22 @@ class HasItemSize:
     @property
     def item_size(self) -> int:
         raise NotImplementedError
+
+
+class UnstableSpecificationWarning(FutureWarning): ...
+
+
+def v3_unstable_dtype_warning(dtype: object) -> None:
+    """
+    Emit this warning when a data type does not have a stable zarr v3 spec
+    """
+    msg = (
+        f"The data type ({dtype}) does not have a Zarr V3 specification. "
+        "That means that the representation of data saved with this data type may change without "
+        "warning in a future version of Zarr Python. "
+        "Arrays stored with this data type may be unreadable by other Zarr libraries "
+        "Use this data type at your own risk! "
+        "Check https://github.com/zarr-developers/zarr-extensions/tree/main/data-types for the "
+        "status of data type specifications for Zarr V3."
+    )
+    warnings.warn(msg, category=UnstableSpecificationWarning, stacklevel=2)
diff --git a/src/zarr/core/dtype/npy/sized.py b/src/zarr/core/dtype/npy/sized.py
index bf54638890..1014ba6f79 100644
--- a/src/zarr/core/dtype/npy/sized.py
+++ b/src/zarr/core/dtype/npy/sized.py
@@ -7,7 +7,13 @@
 import numpy as np
 
 from zarr.core.common import JSON, ZarrFormat
-from zarr.core.dtype.common import DataTypeValidationError, HasEndianness, HasItemSize, HasLength
+from zarr.core.dtype.common import (
+    DataTypeValidationError,
+    HasEndianness,
+    HasItemSize,
+    HasLength,
+    v3_unstable_dtype_warning,
+)
 from zarr.core.dtype.npy.common import (
     EndiannessNumpy,
     bytes_from_json,
@@ -325,6 +331,7 @@ def to_json(self, zarr_format: ZarrFormat) -> JSON:
         if zarr_format == 2:
             return fields
         elif zarr_format == 3:
+            v3_unstable_dtype_warning(self)
             base_dict = {"name": self._zarr_v3_name}
             base_dict["configuration"] = {"fields": fields}  # type: ignore[assignment]
             return cast("JSON", base_dict)
diff --git a/src/zarr/core/dtype/wrapper.py b/src/zarr/core/dtype/wrapper.py
index c8e060e764..1a9d9b1e21 100644
--- a/src/zarr/core/dtype/wrapper.py
+++ b/src/zarr/core/dtype/wrapper.py
@@ -22,7 +22,6 @@
 
 from __future__ import annotations
 
-import warnings
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
 from typing import TYPE_CHECKING, ClassVar, Generic, Self, TypeGuard, TypeVar
@@ -336,15 +335,3 @@ def from_json_value(self: Self, data: JSON, *, zarr_format: ZarrFormat) -> TScal
             The native scalar value.
         """
         ...
-
-
-def v3_unstable_dtype_warning(dtype: ZDType[TBaseDType, TBaseScalar]) -> None:
-    msg = (
-        f"You are using a data type ({dtype}) that does not have a stable Zarr V3 specification."
-        "Be advised that arrays stored with this data type may be unreadable by other Zarr "
-        "libraries, and possibly future versions of Zarr-Python as well. "
-        "Use this data type at your own risk."
-        "See https://github.com/zarr-developers/zarr-extensions/tree/main/data-types for a list"
-        "of data types with a stable Zarr V3 specification."
-    )
-    warnings.warn(msg, category=FutureWarning, stacklevel=2)
diff --git a/tests/test_array.py b/tests/test_array.py
index 0cc32c7806..db7214f3fc 100644
--- a/tests/test_array.py
+++ b/tests/test_array.py
@@ -189,6 +189,7 @@ def test_array_name_properties_with_group(
     assert spam.basename == "spam"
 
 
+@pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning")
 @pytest.mark.parametrize("store", ["memory"], indirect=True)
 @pytest.mark.parametrize("specifiy_fill_value", [True, False])
 @pytest.mark.parametrize(
@@ -199,7 +200,7 @@ def test_array_fill_value_default(
 ) -> None:
     """
     Test that creating an array with the fill_value parameter set to None, or unspecified,
-    results in the expected fill_value attribute of the array, i.e. 0 cast to the array's dtype.
+    results in the expected fill_value attribute of the array, i.e. the default value of the dtype
     """
     shape = (10,)
     if specifiy_fill_value:
@@ -994,6 +995,7 @@ def test_chunks_and_shards(store: Store) -> None:
 
     @staticmethod
     @pytest.mark.parametrize("dtype", zdtype_examples)
+    @pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning")
     def test_default_fill_value(dtype: ZDType[Any, Any], store: Store) -> None:
         """
         Test that the fill value of an array is set to the default value for the dtype object
@@ -1005,6 +1007,7 @@ def test_default_fill_value(dtype: ZDType[Any, Any], store: Store) -> None:
             assert a.fill_value == dtype.default_value()
 
     @staticmethod
+    @pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning")
     @pytest.mark.parametrize("dtype", zdtype_examples)
     def test_dtype_forms(dtype: ZDType[Any, Any], store: Store, zarr_format: ZarrFormat) -> None:
         """
@@ -1050,6 +1053,7 @@ def test_dtype_forms(dtype: ZDType[Any, Any], store: Store, zarr_format: ZarrFor
             assert a.dtype == c.dtype
 
     @staticmethod
+    @pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning")
     @pytest.mark.parametrize("dtype", zdtype_examples)
     def test_dtype_roundtrip(
         dtype: ZDType[Any, Any], store: Store, zarr_format: ZarrFormat
diff --git a/tests/test_dtype/conftest.py b/tests/test_dtype/conftest.py
index bf58a17556..2b21a57365 100644
--- a/tests/test_dtype/conftest.py
+++ b/tests/test_dtype/conftest.py
@@ -1,4 +1,5 @@
 # Generate a collection of zdtype instances for use in testing.
+import warnings
 from typing import Any
 
 import numpy as np
@@ -13,7 +14,11 @@
 for wrapper_cls in data_type_registry.contents.values():
     # The Structured dtype has to be constructed with some actual fields
     if wrapper_cls is Structured:
-        zdtype_examples += (wrapper_cls.from_dtype(np.dtype([("a", np.float64), ("b", np.int8)])),)
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            zdtype_examples += (
+                wrapper_cls.from_dtype(np.dtype([("a", np.float64), ("b", np.int8)])),
+            )
     elif issubclass(wrapper_cls, HasLength):
         zdtype_examples += (wrapper_cls(length=1),)
     elif issubclass(wrapper_cls, DateTime64 | TimeDelta64):
diff --git a/tests/test_dtype/test_wrapper.py b/tests/test_dtype/test_wrapper.py
index 302a419c0f..a33e443c76 100644
--- a/tests/test_dtype/test_wrapper.py
+++ b/tests/test_dtype/test_wrapper.py
@@ -105,6 +105,7 @@ def test_from_json_roundtrip_v2(self, valid_json_v2: Any) -> None:
         zdtype = self.test_cls.from_json(valid_json_v2, zarr_format=2)
         assert zdtype.to_json(zarr_format=2) == valid_json_v2
 
+    @pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning")
     def test_from_json_roundtrip_v3(self, valid_json_v3: Any) -> None:
         zdtype = self.test_cls.from_json(valid_json_v3, zarr_format=3)
         assert zdtype.to_json(zarr_format=3) == valid_json_v3
diff --git a/tests/test_dtype_registry.py b/tests/test_dtype_registry.py
index 35c704673d..0c650e5c29 100644
--- a/tests/test_dtype_registry.py
+++ b/tests/test_dtype_registry.py
@@ -94,6 +94,7 @@ def test_unregistered_dtype(data_type_registry_fixture: DataTypeRegistry) -> Non
             data_type_registry_fixture.get(outside_dtype)
 
     @staticmethod
+    @pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning")
     @pytest.mark.parametrize("zdtype", zdtype_examples)
     def test_registered_dtypes(
         zdtype: ZDType[TBaseDType, TBaseScalar], zarr_format: ZarrFormat
@@ -111,6 +112,7 @@ def test_registered_dtypes(
         )
 
     @staticmethod
+    @pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning")
     @pytest.mark.parametrize("zdtype", zdtype_examples)
     def test_match_dtype_unique(
         zdtype: ZDType[Any, Any],

From 4e2a15783635f0e3b95febd88d5fa75177f8b7c1 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Sat, 24 May 2025 14:10:09 +0200
Subject: [PATCH 107/130] put string dtypes in the strings module

---
 src/zarr/core/dtype/__init__.py         |   4 +-
 src/zarr/core/dtype/npy/sized.py        | 158 +----------------------
 src/zarr/core/dtype/npy/string.py       | 164 +++++++++++++++++++++++-
 tests/test_dtype/test_npy/test_sized.py |   8 +-
 tests/test_v2.py                        |   3 +-
 5 files changed, 171 insertions(+), 166 deletions(-)

diff --git a/src/zarr/core/dtype/__init__.py b/src/zarr/core/dtype/__init__.py
index 5d51db92db..9c672fd986 100644
--- a/src/zarr/core/dtype/__init__.py
+++ b/src/zarr/core/dtype/__init__.py
@@ -8,9 +8,7 @@
 from zarr.core.dtype.npy.float import Float16, Float32, Float64
 from zarr.core.dtype.npy.int import Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64
 from zarr.core.dtype.npy.sized import (
-    FixedLengthASCII,
     FixedLengthBytes,
-    FixedLengthUTF32,
     Structured,
 )
 from zarr.core.dtype.npy.time import DateTime64, TimeDelta64
@@ -24,6 +22,8 @@
 from zarr.core.common import JSON
 from zarr.core.dtype.npy.string import (
     _NUMPY_SUPPORTS_VLEN_STRING,
+    FixedLengthASCII,
+    FixedLengthUTF32,
     VariableLengthString,
 )
 from zarr.core.dtype.registry import DataTypeRegistry
diff --git a/src/zarr/core/dtype/npy/sized.py b/src/zarr/core/dtype/npy/sized.py
index 1014ba6f79..eb2b39ad9a 100644
--- a/src/zarr/core/dtype/npy/sized.py
+++ b/src/zarr/core/dtype/npy/sized.py
@@ -2,100 +2,25 @@
 import re
 from collections.abc import Sequence
 from dataclasses import dataclass
-from typing import Any, ClassVar, Self, TypeGuard, cast
+from typing import Any, Self, TypeGuard, cast
 
 import numpy as np
 
 from zarr.core.common import JSON, ZarrFormat
 from zarr.core.dtype.common import (
     DataTypeValidationError,
-    HasEndianness,
     HasItemSize,
     HasLength,
     v3_unstable_dtype_warning,
 )
 from zarr.core.dtype.npy.common import (
-    EndiannessNumpy,
     bytes_from_json,
     bytes_to_json,
     check_json_str,
-    endianness_from_numpy_str,
-    endianness_to_numpy_str,
 )
 from zarr.core.dtype.wrapper import TBaseDType, TBaseScalar, ZDType
 
 
-@dataclass(frozen=True, kw_only=True)
-class FixedLengthASCII(ZDType[np.dtypes.BytesDType[int], np.bytes_], HasLength, HasItemSize):
-    dtype_cls = np.dtypes.BytesDType
-    _zarr_v3_name = "numpy.fixed_length_ascii"
-
-    @classmethod
-    def _from_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
-        return cls(length=dtype.itemsize)
-
-    def to_dtype(self) -> np.dtypes.BytesDType[int]:
-        return self.dtype_cls(self.length)
-
-    @classmethod
-    def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[JSON]:
-        """
-        Check that the input is a valid JSON representation of a numpy S dtype.
-        """
-        if zarr_format == 2:
-            # match |S1, |S2, etc
-            return isinstance(data, str) and re.match(r"^\|S\d+$", data) is not None
-        elif zarr_format == 3:
-            return (
-                isinstance(data, dict)
-                and set(data.keys()) == {"name", "configuration"}
-                and data["name"] == cls._zarr_v3_name
-                and isinstance(data["configuration"], dict)
-                and "length_bytes" in data["configuration"]
-            )
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
-
-    def to_json(self, zarr_format: ZarrFormat) -> JSON:
-        if zarr_format == 2:
-            return self.to_dtype().str
-        elif zarr_format == 3:
-            return {
-                "name": self._zarr_v3_name,
-                "configuration": {"length_bytes": self.length},
-            }
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
-
-    @classmethod
-    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
-        if zarr_format == 2:
-            return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
-        elif zarr_format == 3:
-            return cls(length=data["configuration"]["length_bytes"])  # type: ignore[arg-type, index, call-overload]
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
-
-    def default_value(self) -> np.bytes_:
-        return np.bytes_(b"")
-
-    def to_json_value(self, data: object, *, zarr_format: ZarrFormat) -> str:
-        return base64.standard_b64encode(data).decode("ascii")  # type: ignore[arg-type]
-
-    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.bytes_:
-        if check_json_str(data):
-            return self.to_dtype().type(base64.standard_b64decode(data.encode("ascii")))
-        raise TypeError(f"Invalid type: {data}. Expected a string.")  # pragma: no cover
-
-    def check_value(self, data: object) -> bool:
-        # this is generous for backwards compatibility
-        return isinstance(data, np.bytes_ | str | bytes | int)
-
-    def _cast_value_unsafe(self, value: object) -> np.bytes_:
-        return self.to_dtype().type(value)
-
-    @property
-    def item_size(self) -> int:
-        return self.length
-
-
 @dataclass(frozen=True, kw_only=True)
 class FixedLengthBytes(ZDType[np.dtypes.VoidDType[int], np.void], HasLength, HasItemSize):
     # np.dtypes.VoidDType is specified in an odd way in numpy
@@ -190,87 +115,6 @@ def item_size(self) -> int:
         return self.length
 
 
-@dataclass(frozen=True, kw_only=True)
-class FixedLengthUTF32(
-    ZDType[np.dtypes.StrDType[int], np.str_], HasEndianness, HasLength, HasItemSize
-):
-    dtype_cls = np.dtypes.StrDType
-    _zarr_v3_name = "numpy.fixed_length_utf32"
-    code_point_bytes: ClassVar[int] = 4  # utf32 is 4 bytes per code point
-
-    @classmethod
-    def _from_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
-        byte_order = cast("EndiannessNumpy", dtype.byteorder)
-        return cls(
-            length=dtype.itemsize // (cls.code_point_bytes),
-            endianness=endianness_from_numpy_str(byte_order),
-        )
-
-    def to_dtype(self) -> np.dtypes.StrDType[int]:
-        byte_order = endianness_to_numpy_str(self.endianness)
-        return self.dtype_cls(self.length).newbyteorder(byte_order)
-
-    @classmethod
-    def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[JSON]:
-        """
-        Check that the input is a valid JSON representation of a numpy S dtype.
-        """
-        if zarr_format == 2:
-            # match >U1, <U2, etc
-            return isinstance(data, str) and re.match(r"^[><]U\d+$", data) is not None
-        elif zarr_format == 3:
-            return (
-                isinstance(data, dict)
-                and set(data.keys()) == {"name", "configuration"}
-                and data["name"] == cls._zarr_v3_name
-                and "configuration" in data
-                and isinstance(data["configuration"], dict)
-                and set(data["configuration"].keys()) == {"length_bytes"}
-                and isinstance(data["configuration"]["length_bytes"], int)
-            )
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
-
-    def to_json(self, zarr_format: ZarrFormat) -> JSON:
-        if zarr_format == 2:
-            return self.to_dtype().str
-        elif zarr_format == 3:
-            return {
-                "name": self._zarr_v3_name,
-                "configuration": {"length_bytes": self.length * self.code_point_bytes},
-            }
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
-
-    @classmethod
-    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
-        if zarr_format == 2:
-            return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
-        elif zarr_format == 3:
-            return cls(length=data["configuration"]["length_bytes"] // cls.code_point_bytes)  # type: ignore[arg-type, index, call-overload, operator]
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
-
-    def default_value(self) -> np.str_:
-        return np.str_("")
-
-    def to_json_value(self, data: object, *, zarr_format: ZarrFormat) -> str:
-        return str(data)
-
-    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.str_:
-        if check_json_str(data):
-            return self.to_dtype().type(data)
-        raise TypeError(f"Invalid type: {data}. Expected a string.")  # pragma: no cover
-
-    def check_value(self, data: object) -> bool:
-        # this is generous for backwards compatibility
-        return isinstance(data, str | np.str_ | bytes | int)
-
-    def _cast_value_unsafe(self, data: object) -> np.str_:
-        return self.to_dtype().type(data)
-
-    @property
-    def item_size(self) -> int:
-        return self.length * self.code_point_bytes
-
-
 @dataclass(frozen=True, kw_only=True)
 class Structured(ZDType[np.dtypes.VoidDType[int], np.void], HasItemSize):
     dtype_cls = np.dtypes.VoidDType  # type: ignore[assignment]
diff --git a/src/zarr/core/dtype/npy/string.py b/src/zarr/core/dtype/npy/string.py
index d5a4f9be08..f65db5a984 100644
--- a/src/zarr/core/dtype/npy/string.py
+++ b/src/zarr/core/dtype/npy/string.py
@@ -1,11 +1,19 @@
 from __future__ import annotations
 
+import base64
+import re
 from dataclasses import dataclass
-from typing import TYPE_CHECKING, Self, TypeGuard
+from typing import TYPE_CHECKING, ClassVar, Self, TypeGuard, cast
 
 import numpy as np
 
-from zarr.core.dtype.npy.common import check_json_str
+from zarr.core.dtype.common import HasEndianness, HasItemSize, HasLength
+from zarr.core.dtype.npy.common import (
+    EndiannessNumpy,
+    check_json_str,
+    endianness_from_numpy_str,
+    endianness_to_numpy_str,
+)
 from zarr.core.dtype.wrapper import ZDType
 
 if TYPE_CHECKING:
@@ -15,6 +23,158 @@
 _NUMPY_SUPPORTS_VLEN_STRING = hasattr(np.dtypes, "StringDType")
 
 
+@dataclass(frozen=True, kw_only=True)
+class FixedLengthASCII(ZDType[np.dtypes.BytesDType[int], np.bytes_], HasLength, HasItemSize):
+    dtype_cls = np.dtypes.BytesDType
+    _zarr_v3_name = "numpy.fixed_length_ascii"
+
+    @classmethod
+    def _from_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
+        return cls(length=dtype.itemsize)
+
+    def to_dtype(self) -> np.dtypes.BytesDType[int]:
+        return self.dtype_cls(self.length)
+
+    @classmethod
+    def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[JSON]:
+        """
+        Check that the input is a valid JSON representation of a numpy S dtype.
+        """
+        if zarr_format == 2:
+            # match |S1, |S2, etc
+            return isinstance(data, str) and re.match(r"^\|S\d+$", data) is not None
+        elif zarr_format == 3:
+            return (
+                isinstance(data, dict)
+                and set(data.keys()) == {"name", "configuration"}
+                and data["name"] == cls._zarr_v3_name
+                and isinstance(data["configuration"], dict)
+                and "length_bytes" in data["configuration"]
+            )
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+
+    def to_json(self, zarr_format: ZarrFormat) -> JSON:
+        if zarr_format == 2:
+            return self.to_dtype().str
+        elif zarr_format == 3:
+            return {
+                "name": self._zarr_v3_name,
+                "configuration": {"length_bytes": self.length},
+            }
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+
+    @classmethod
+    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
+        if zarr_format == 2:
+            return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
+        elif zarr_format == 3:
+            return cls(length=data["configuration"]["length_bytes"])  # type: ignore[arg-type, index, call-overload]
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+
+    def default_value(self) -> np.bytes_:
+        return np.bytes_(b"")
+
+    def to_json_value(self, data: object, *, zarr_format: ZarrFormat) -> str:
+        return base64.standard_b64encode(data).decode("ascii")  # type: ignore[arg-type]
+
+    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.bytes_:
+        if check_json_str(data):
+            return self.to_dtype().type(base64.standard_b64decode(data.encode("ascii")))
+        raise TypeError(f"Invalid type: {data}. Expected a string.")  # pragma: no cover
+
+    def check_value(self, data: object) -> bool:
+        # this is generous for backwards compatibility
+        return isinstance(data, np.bytes_ | str | bytes | int)
+
+    def _cast_value_unsafe(self, value: object) -> np.bytes_:
+        return self.to_dtype().type(value)
+
+    @property
+    def item_size(self) -> int:
+        return self.length
+
+
+@dataclass(frozen=True, kw_only=True)
+class FixedLengthUTF32(
+    ZDType[np.dtypes.StrDType[int], np.str_], HasEndianness, HasLength, HasItemSize
+):
+    dtype_cls = np.dtypes.StrDType
+    _zarr_v3_name = "numpy.fixed_length_utf32"
+    code_point_bytes: ClassVar[int] = 4  # utf32 is 4 bytes per code point
+
+    @classmethod
+    def _from_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
+        byte_order = cast("EndiannessNumpy", dtype.byteorder)
+        return cls(
+            length=dtype.itemsize // (cls.code_point_bytes),
+            endianness=endianness_from_numpy_str(byte_order),
+        )
+
+    def to_dtype(self) -> np.dtypes.StrDType[int]:
+        byte_order = endianness_to_numpy_str(self.endianness)
+        return self.dtype_cls(self.length).newbyteorder(byte_order)
+
+    @classmethod
+    def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[JSON]:
+        """
+        Check that the input is a valid JSON representation of a numpy S dtype.
+        """
+        if zarr_format == 2:
+            # match >U1, <U2, etc
+            return isinstance(data, str) and re.match(r"^[><]U\d+$", data) is not None
+        elif zarr_format == 3:
+            return (
+                isinstance(data, dict)
+                and set(data.keys()) == {"name", "configuration"}
+                and data["name"] == cls._zarr_v3_name
+                and "configuration" in data
+                and isinstance(data["configuration"], dict)
+                and set(data["configuration"].keys()) == {"length_bytes"}
+                and isinstance(data["configuration"]["length_bytes"], int)
+            )
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+
+    def to_json(self, zarr_format: ZarrFormat) -> JSON:
+        if zarr_format == 2:
+            return self.to_dtype().str
+        elif zarr_format == 3:
+            return {
+                "name": self._zarr_v3_name,
+                "configuration": {"length_bytes": self.length * self.code_point_bytes},
+            }
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+
+    @classmethod
+    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
+        if zarr_format == 2:
+            return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
+        elif zarr_format == 3:
+            return cls(length=data["configuration"]["length_bytes"] // cls.code_point_bytes)  # type: ignore[arg-type, index, call-overload, operator]
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+
+    def default_value(self) -> np.str_:
+        return np.str_("")
+
+    def to_json_value(self, data: object, *, zarr_format: ZarrFormat) -> str:
+        return str(data)
+
+    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.str_:
+        if check_json_str(data):
+            return self.to_dtype().type(data)
+        raise TypeError(f"Invalid type: {data}. Expected a string.")  # pragma: no cover
+
+    def check_value(self, data: object) -> bool:
+        # this is generous for backwards compatibility
+        return isinstance(data, str | np.str_ | bytes | int)
+
+    def _cast_value_unsafe(self, data: object) -> np.str_:
+        return self.to_dtype().type(data)
+
+    @property
+    def item_size(self) -> int:
+        return self.length * self.code_point_bytes
+
+
 if _NUMPY_SUPPORTS_VLEN_STRING:
 
     @dataclass(frozen=True, kw_only=True)
diff --git a/tests/test_dtype/test_npy/test_sized.py b/tests/test_dtype/test_npy/test_sized.py
index 8bc83f2f73..c0e8f137d4 100644
--- a/tests/test_dtype/test_npy/test_sized.py
+++ b/tests/test_dtype/test_npy/test_sized.py
@@ -5,12 +5,14 @@
 import numpy as np
 
 from tests.test_dtype.test_wrapper import _TestZDType
-from zarr.core.dtype.npy.float import Float16, Float64
-from zarr.core.dtype.npy.int import Int32, Int64
-from zarr.core.dtype.npy.sized import (
+from zarr.core.dtype import (
     FixedLengthASCII,
     FixedLengthBytes,
     FixedLengthUTF32,
+    Float16,
+    Float64,
+    Int32,
+    Int64,
     Structured,
 )
 
diff --git a/tests/test_v2.py b/tests/test_v2.py
index 1b21e09952..4b041a9b82 100644
--- a/tests/test_v2.py
+++ b/tests/test_v2.py
@@ -15,8 +15,7 @@
 from zarr import config
 from zarr.abc.store import Store
 from zarr.core.buffer.core import default_buffer_prototype
-from zarr.core.dtype.npy.sized import FixedLengthASCII, FixedLengthUTF32, Structured
-from zarr.core.dtype.npy.string import VariableLengthString
+from zarr.core.dtype import FixedLengthASCII, FixedLengthUTF32, Structured, VariableLengthString
 from zarr.core.dtype.wrapper import ZDType
 from zarr.core.sync import sync
 from zarr.storage import MemoryStore, StorePath

From 528a942c91de3febd3897e7d7ee21152fcbfed62 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Sat, 24 May 2025 22:56:35 +0200
Subject: [PATCH 108/130] make tests isomorphic to source code

---
 tests/test_dtype/test_npy/test_sized.py  | 83 ------------------------
 tests/test_dtype/test_npy/test_string.py | 82 +++++++++++++++++++++++
 tests/test_dtype/test_wrapper.py         |  2 +-
 3 files changed, 83 insertions(+), 84 deletions(-)

diff --git a/tests/test_dtype/test_npy/test_sized.py b/tests/test_dtype/test_npy/test_sized.py
index c0e8f137d4..eaaa915f59 100644
--- a/tests/test_dtype/test_npy/test_sized.py
+++ b/tests/test_dtype/test_npy/test_sized.py
@@ -6,9 +6,7 @@
 
 from tests.test_dtype.test_wrapper import _TestZDType
 from zarr.core.dtype import (
-    FixedLengthASCII,
     FixedLengthBytes,
-    FixedLengthUTF32,
     Float16,
     Float64,
     Int32,
@@ -17,48 +15,6 @@
 )
 
 
-class TestFixedLengthAscii(_TestZDType):
-    test_cls = FixedLengthASCII
-    valid_dtype = (np.dtype("|S10"), np.dtype("|S4"))
-    invalid_dtype = (
-        np.dtype(np.int8),
-        np.dtype(np.float64),
-        np.dtype("|U10"),
-    )
-    valid_json_v2 = ("|S0", "|S2", "|S4")
-    valid_json_v3 = ({"name": "numpy.fixed_length_ascii", "configuration": {"length_bytes": 10}},)
-    invalid_json_v2 = (
-        "|S",
-        "|U10",
-        "|f8",
-    )
-    invalid_json_v3 = (
-        {"name": "numpy.fixed_length_ascii", "configuration": {"length_bits": 0}},
-        {"name": "numpy.fixed_length_ascii", "configuration": {"length_bits": "invalid"}},
-    )
-
-    scalar_v2_params = (
-        (FixedLengthASCII(length=0), ""),
-        (FixedLengthASCII(length=2), "YWI="),
-        (FixedLengthASCII(length=4), "YWJjZA=="),
-    )
-    scalar_v3_params = (
-        (FixedLengthASCII(length=0), ""),
-        (FixedLengthASCII(length=2), "YWI="),
-        (FixedLengthASCII(length=4), "YWJjZA=="),
-    )
-    cast_value_params = (
-        (FixedLengthASCII(length=0), "", np.bytes_("")),
-        (FixedLengthASCII(length=2), "ab", np.bytes_("ab")),
-        (FixedLengthASCII(length=4), "abcd", np.bytes_("abcd")),
-    )
-    item_size_params = (
-        FixedLengthASCII(length=0),
-        FixedLengthASCII(length=4),
-        FixedLengthASCII(length=10),
-    )
-
-
 class TestFixedLengthBytes(_TestZDType):
     test_cls = FixedLengthBytes
     valid_dtype = (np.dtype("|V10"),)
@@ -105,45 +61,6 @@ class TestFixedLengthBytes(_TestZDType):
     )
 
 
-class TestFixedLengthUTF32(_TestZDType):
-    test_cls = FixedLengthUTF32
-    valid_dtype = (np.dtype(">U10"), np.dtype("<U10"))
-    invalid_dtype = (
-        np.dtype(np.int8),
-        np.dtype(np.float64),
-        np.dtype("|S10"),
-    )
-    valid_json_v2 = (">U10", "<U10")
-    valid_json_v3 = ({"name": "numpy.fixed_length_utf32", "configuration": {"length_bytes": 320}},)
-    invalid_json_v2 = (
-        "|U",
-        "|S10",
-        "|f8",
-    )
-    invalid_json_v3 = (
-        {"name": "numpy.fixed_length_utf32", "configuration": {"length_bits": 0}},
-        {"name": "numpy.fixed_length_utf32", "configuration": {"length_bits": "invalid"}},
-    )
-
-    scalar_v2_params = ((FixedLengthUTF32(length=0), ""), (FixedLengthUTF32(length=2), "hi"))
-    scalar_v3_params = (
-        (FixedLengthUTF32(length=0), ""),
-        (FixedLengthUTF32(length=2), "hi"),
-        (FixedLengthUTF32(length=4), "hihi"),
-    )
-
-    cast_value_params = (
-        (FixedLengthUTF32(length=0), "", np.str_("")),
-        (FixedLengthUTF32(length=2), "hi", np.str_("hi")),
-        (FixedLengthUTF32(length=4), "hihi", np.str_("hihi")),
-    )
-    item_size_params = (
-        FixedLengthUTF32(length=0),
-        FixedLengthUTF32(length=4),
-        FixedLengthUTF32(length=10),
-    )
-
-
 class TestStructured(_TestZDType):
     test_cls = Structured
     valid_dtype = (
diff --git a/tests/test_dtype/test_npy/test_string.py b/tests/test_dtype/test_npy/test_string.py
index 1046afcac0..6620f45052 100644
--- a/tests/test_dtype/test_npy/test_string.py
+++ b/tests/test_dtype/test_npy/test_string.py
@@ -3,6 +3,7 @@
 import numpy as np
 
 from tests.test_dtype.test_wrapper import _TestZDType
+from zarr.core.dtype import FixedLengthASCII, FixedLengthUTF32
 from zarr.core.dtype.npy.string import _NUMPY_SUPPORTS_VLEN_STRING, VariableLengthString
 
 if _NUMPY_SUPPORTS_VLEN_STRING:
@@ -73,3 +74,84 @@ class TestVariableLengthString(_TestZDType):  # type: ignore[no-redef]
         )
 
         item_size_params = (VariableLengthString(),)
+
+
+class TestFixedLengthAscii(_TestZDType):
+    test_cls = FixedLengthASCII
+    valid_dtype = (np.dtype("|S10"), np.dtype("|S4"))
+    invalid_dtype = (
+        np.dtype(np.int8),
+        np.dtype(np.float64),
+        np.dtype("|U10"),
+    )
+    valid_json_v2 = ("|S0", "|S2", "|S4")
+    valid_json_v3 = ({"name": "numpy.fixed_length_ascii", "configuration": {"length_bytes": 10}},)
+    invalid_json_v2 = (
+        "|S",
+        "|U10",
+        "|f8",
+    )
+    invalid_json_v3 = (
+        {"name": "numpy.fixed_length_ascii", "configuration": {"length_bits": 0}},
+        {"name": "numpy.fixed_length_ascii", "configuration": {"length_bits": "invalid"}},
+    )
+
+    scalar_v2_params = (
+        (FixedLengthASCII(length=0), ""),
+        (FixedLengthASCII(length=2), "YWI="),
+        (FixedLengthASCII(length=4), "YWJjZA=="),
+    )
+    scalar_v3_params = (
+        (FixedLengthASCII(length=0), ""),
+        (FixedLengthASCII(length=2), "YWI="),
+        (FixedLengthASCII(length=4), "YWJjZA=="),
+    )
+    cast_value_params = (
+        (FixedLengthASCII(length=0), "", np.bytes_("")),
+        (FixedLengthASCII(length=2), "ab", np.bytes_("ab")),
+        (FixedLengthASCII(length=4), "abcd", np.bytes_("abcd")),
+    )
+    item_size_params = (
+        FixedLengthASCII(length=0),
+        FixedLengthASCII(length=4),
+        FixedLengthASCII(length=10),
+    )
+
+
+class TestFixedLengthUTF32(_TestZDType):
+    test_cls = FixedLengthUTF32
+    valid_dtype = (np.dtype(">U10"), np.dtype("<U10"))
+    invalid_dtype = (
+        np.dtype(np.int8),
+        np.dtype(np.float64),
+        np.dtype("|S10"),
+    )
+    valid_json_v2 = (">U10", "<U10")
+    valid_json_v3 = ({"name": "numpy.fixed_length_utf32", "configuration": {"length_bytes": 320}},)
+    invalid_json_v2 = (
+        "|U",
+        "|S10",
+        "|f8",
+    )
+    invalid_json_v3 = (
+        {"name": "numpy.fixed_length_utf32", "configuration": {"length_bits": 0}},
+        {"name": "numpy.fixed_length_utf32", "configuration": {"length_bits": "invalid"}},
+    )
+
+    scalar_v2_params = ((FixedLengthUTF32(length=0), ""), (FixedLengthUTF32(length=2), "hi"))
+    scalar_v3_params = (
+        (FixedLengthUTF32(length=0), ""),
+        (FixedLengthUTF32(length=2), "hi"),
+        (FixedLengthUTF32(length=4), "hihi"),
+    )
+
+    cast_value_params = (
+        (FixedLengthUTF32(length=0), "", np.str_("")),
+        (FixedLengthUTF32(length=2), "hi", np.str_("hi")),
+        (FixedLengthUTF32(length=4), "hihi", np.str_("hihi")),
+    )
+    item_size_params = (
+        FixedLengthUTF32(length=0),
+        FixedLengthUTF32(length=4),
+        FixedLengthUTF32(length=10),
+    )
diff --git a/tests/test_dtype/test_wrapper.py b/tests/test_dtype/test_wrapper.py
index a33e443c76..a61fc1a9cd 100644
--- a/tests/test_dtype/test_wrapper.py
+++ b/tests/test_dtype/test_wrapper.py
@@ -92,7 +92,7 @@ def scalar_equals(self, scalar1: object, scalar2: object) -> bool:
         return scalar1 == scalar2
 
     def test_check_dtype_valid(self, valid_dtype: object) -> None:
-        assert self.test_cls.check_dtype(valid_dtype)  # type: ignore[arg-type]
+        assert self.test_cls.check_dtype(valid_dtype)
 
     def test_check_dtype_invalid(self, invalid_dtype: object) -> None:
         assert not self.test_cls.check_dtype(invalid_dtype)  # type: ignore[arg-type]

From c9c8181534837fb4d43d19c499d1750ddd6eae17 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Sun, 25 May 2025 12:19:20 +0200
Subject: [PATCH 109/130] remove old string logic

---
 src/zarr/codecs/vlen_utf8.py   |  5 +-
 src/zarr/core/strings.py       | 89 ----------------------------------
 tests/test_codecs/test_vlen.py |  2 +-
 tests/test_metadata/test_v3.py |  2 +-
 tests/test_strings.py          | 37 --------------
 5 files changed, 4 insertions(+), 131 deletions(-)
 delete mode 100644 src/zarr/core/strings.py
 delete mode 100644 tests/test_strings.py

diff --git a/src/zarr/codecs/vlen_utf8.py b/src/zarr/codecs/vlen_utf8.py
index 0ef423793d..15bae8da81 100644
--- a/src/zarr/codecs/vlen_utf8.py
+++ b/src/zarr/codecs/vlen_utf8.py
@@ -10,7 +10,6 @@
 from zarr.abc.codec import ArrayBytesCodec
 from zarr.core.buffer import Buffer, NDBuffer
 from zarr.core.common import JSON, parse_named_configuration
-from zarr.core.strings import cast_to_string_dtype
 from zarr.registry import register_codec
 
 if TYPE_CHECKING:
@@ -49,6 +48,7 @@ def to_dict(self) -> dict[str, JSON]:
     def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self:
         return self
 
+    # TODO: expand the tests for this function
     async def _decode_single(
         self,
         chunk_bytes: Buffer,
@@ -60,8 +60,7 @@ async def _decode_single(
         decoded = _vlen_utf8_codec.decode(raw_bytes)
         assert decoded.dtype == np.object_
         decoded.shape = chunk_spec.shape
-        # coming out of the code, we know this is safe, so don't issue a warning
-        as_string_dtype = cast_to_string_dtype(decoded, safe=True)
+        as_string_dtype = decoded.astype(chunk_spec.dtype.to_dtype(), copy=False)
         return chunk_spec.prototype.nd_buffer.from_numpy_array(as_string_dtype)
 
     async def _encode_single(
diff --git a/src/zarr/core/strings.py b/src/zarr/core/strings.py
deleted file mode 100644
index 15c30b6f9b..0000000000
--- a/src/zarr/core/strings.py
+++ /dev/null
@@ -1,89 +0,0 @@
-"""This module contains utilities for working with string arrays across
-different versions of Numpy.
-"""
-
-from __future__ import annotations
-
-from typing import Any, Union, cast
-from warnings import warn
-
-import numpy as np
-
-# _STRING_DTYPE is the in-memory datatype that will be used for V3 string arrays
-# when reading data back from Zarr.
-# Any valid string-like datatype should be fine for *setting* data.
-
-VLenStringType = Union["np.dtypes.StringDType", "np.dtypes.ObjectDType"]
-_VLEN_STRING_DTYPE: VLenStringType
-_NUMPY_SUPPORTS_VLEN_STRING: bool
-
-
-def cast_array(
-    data: np.ndarray[Any, np.dtype[Any]],
-) -> np.ndarray[Any, VLenStringType]:
-    raise NotImplementedError
-
-
-try:
-    # this new vlen string dtype was added in NumPy 2.0
-    _VLEN_STRING_DTYPE = np.dtypes.StringDType()
-    _NUMPY_SUPPORTS_VLEN_STRING = True
-
-    def cast_array(
-        data: np.ndarray[Any, np.dtype[Any]],
-    ) -> np.ndarray[Any, VLenStringType]:
-        out = data.astype(_VLEN_STRING_DTYPE, copy=False)
-        return cast(np.ndarray[Any, np.dtypes.StringDType], out)
-
-except AttributeError:
-    # if not available, we fall back on an object array of strings, as in Zarr < 3
-    _VLEN_STRING_DTYPE = np.dtypes.ObjectDType()
-    _NUMPY_SUPPORTS_VLEN_STRING = False
-
-    def cast_array(
-        data: np.ndarray[Any, np.dtype[Any]],
-    ) -> np.ndarray[Any, VLenStringType]:
-        out = data.astype(_VLEN_STRING_DTYPE, copy=False)
-        return cast(np.ndarray[Any, np.dtypes.ObjectDType], out)
-
-
-def cast_to_string_dtype(
-    data: np.ndarray[Any, np.dtype[Any]], safe: bool = False
-) -> np.ndarray[Any, VLenStringType]:
-    """Take any data and attempt to cast to to our preferred string dtype.
-
-    data :  np.ndarray
-        The data to cast
-
-    safe : bool
-        If True, do not issue a warning if the data is cast from object to string dtype.
-
-    """
-    if np.issubdtype(data.dtype, np.str_):
-        # legacy fixed-width string type (e.g. "<U10")
-        return cast_array(data)
-        # out = data.astype(STRING_DTYPE, copy=False)
-        # return cast(np.ndarray[Any, np.dtypes.StringDType | np.dtypes.ObjectDType], out)
-    if _NUMPY_SUPPORTS_VLEN_STRING and np.issubdtype(data.dtype, _VLEN_STRING_DTYPE):
-        # already a valid string variable length string dtype
-        return cast_array(data)
-    if np.issubdtype(data.dtype, np.object_):
-        # object arrays require more careful handling
-        if _NUMPY_SUPPORTS_VLEN_STRING:
-            try:
-                # cast to variable-length string dtype, fail if object contains non-string data
-                # mypy says "error: Unexpected keyword argument "coerce" for "StringDType"  [call-arg]"
-                # also: Value of type variable "_ScalarType" of "astype" of "ndarray" cannot be "str"  [type-var]
-                out = data.astype(np.dtypes.StringDType(coerce=False), copy=False)  # type: ignore[call-arg,type-var]
-                return cast_array(out)
-            except ValueError as e:
-                raise ValueError("Cannot cast object dtype to string dtype") from e
-        else:
-            if not safe:
-                warn(
-                    "Treating object array as valid string array. To avoid this warning, "
-                    "cast the data to a string dtype before passing to Zarr or upgrade to NumPy >= 2.",
-                    stacklevel=2,
-                )
-            return cast_array(data)
-    raise ValueError(f"Cannot cast dtype {data.dtype} to string dtype")
diff --git a/tests/test_codecs/test_vlen.py b/tests/test_codecs/test_vlen.py
index b1508953ea..9024efa7ed 100644
--- a/tests/test_codecs/test_vlen.py
+++ b/tests/test_codecs/test_vlen.py
@@ -9,8 +9,8 @@
 from zarr.abc.store import Store
 from zarr.codecs import ZstdCodec
 from zarr.core.dtype import get_data_type_from_native_dtype
+from zarr.core.dtype.npy.string import _NUMPY_SUPPORTS_VLEN_STRING
 from zarr.core.metadata.v3 import ArrayV3Metadata
-from zarr.core.strings import _NUMPY_SUPPORTS_VLEN_STRING
 from zarr.storage import StorePath
 
 numpy_str_dtypes: list[type | str | None] = [None, str, "str", np.dtypes.StrDType, "S", "U"]
diff --git a/tests/test_metadata/test_v3.py b/tests/test_metadata/test_v3.py
index 0d7da0153f..f3bd4510e5 100644
--- a/tests/test_metadata/test_v3.py
+++ b/tests/test_metadata/test_v3.py
@@ -12,6 +12,7 @@
 from zarr.core.chunk_key_encodings import DefaultChunkKeyEncoding, V2ChunkKeyEncoding
 from zarr.core.config import config
 from zarr.core.dtype import get_data_type_from_native_dtype
+from zarr.core.dtype.npy.string import _NUMPY_SUPPORTS_VLEN_STRING
 from zarr.core.dtype.npy.time import DateTime64
 from zarr.core.group import GroupMetadata, parse_node_type
 from zarr.core.metadata.v3 import (
@@ -19,7 +20,6 @@
     parse_dimension_names,
     parse_zarr_format,
 )
-from zarr.core.strings import _NUMPY_SUPPORTS_VLEN_STRING
 from zarr.errors import MetadataValidationError
 
 if TYPE_CHECKING:
diff --git a/tests/test_strings.py b/tests/test_strings.py
deleted file mode 100644
index 963f2e305e..0000000000
--- a/tests/test_strings.py
+++ /dev/null
@@ -1,37 +0,0 @@
-"""Tests for the strings module."""
-
-import numpy as np
-import pytest
-
-from zarr.core.strings import _NUMPY_SUPPORTS_VLEN_STRING, _VLEN_STRING_DTYPE, cast_to_string_dtype
-
-
-def test_string_defaults() -> None:
-    if _NUMPY_SUPPORTS_VLEN_STRING:
-        assert _VLEN_STRING_DTYPE == np.dtypes.StringDType()
-    else:
-        assert _VLEN_STRING_DTYPE == np.dtypes.ObjectDType()
-
-
-def test_cast_to_string_dtype() -> None:
-    d1 = np.array(["a", "b", "c"])
-    assert d1.dtype == np.dtype("<U1")
-    d1s = cast_to_string_dtype(d1)
-    assert d1s.dtype == _VLEN_STRING_DTYPE
-
-    with pytest.raises(ValueError, match="Cannot cast dtype |S1"):
-        cast_to_string_dtype(d1.astype("|S1"))
-
-    if _NUMPY_SUPPORTS_VLEN_STRING:
-        assert cast_to_string_dtype(d1.astype("T")).dtype == _VLEN_STRING_DTYPE
-        assert cast_to_string_dtype(d1.astype("O")).dtype == _VLEN_STRING_DTYPE
-        with pytest.raises(ValueError, match="Cannot cast object dtype to string dtype"):
-            cast_to_string_dtype(np.array([1, "b", "c"], dtype="O"))
-    else:
-        with pytest.warns():
-            assert cast_to_string_dtype(d1.astype("O")).dtype == _VLEN_STRING_DTYPE
-        with pytest.warns():
-            assert (
-                cast_to_string_dtype(np.array([1, "b", "c"], dtype="O")).dtype == _VLEN_STRING_DTYPE
-            )
-        assert cast_to_string_dtype(d1.astype("O"), safe=True).dtype == _VLEN_STRING_DTYPE

From d80d565f68faddaf9b5d3706165a199fd4e254a8 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Mon, 26 May 2025 17:27:41 +0200
Subject: [PATCH 110/130] use scale_factor and unit in cast_value for datetime

---
 src/zarr/core/dtype/npy/time.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/zarr/core/dtype/npy/time.py b/src/zarr/core/dtype/npy/time.py
index 61786351f8..1c0e0d715c 100644
--- a/src/zarr/core/dtype/npy/time.py
+++ b/src/zarr/core/dtype/npy/time.py
@@ -240,7 +240,7 @@ def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.datetime
         raise TypeError(f"Invalid type: {data}. Expected an integer.")  # pragma: no cover
 
     def _cast_value_unsafe(self, data: object) -> np.datetime64:
-        return self.to_dtype().type(data)  # type: ignore[no-any-return, call-overload]
+        return self.to_dtype().type(data, f"{self.scale_factor}{self.unit}")  # type: ignore[no-any-return, call-overload]
 
     @classmethod
     def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[JSON]:

From 7806563c681ec53b6c446a99a8680b1c70f5fc98 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Tue, 27 May 2025 10:46:10 +0200
Subject: [PATCH 111/130] add regression testing against v2.18

---
 tests/test_regression/__init__.py        |   0
 tests/test_regression/test_regression.py | 125 +++++++++++++++++++++++
 tests/test_regression/v2.18.py           |  81 +++++++++++++++
 3 files changed, 206 insertions(+)
 create mode 100644 tests/test_regression/__init__.py
 create mode 100644 tests/test_regression/test_regression.py
 create mode 100644 tests/test_regression/v2.18.py

diff --git a/tests/test_regression/__init__.py b/tests/test_regression/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/tests/test_regression/test_regression.py b/tests/test_regression/test_regression.py
new file mode 100644
index 0000000000..362e8d75b4
--- /dev/null
+++ b/tests/test_regression/test_regression.py
@@ -0,0 +1,125 @@
+import subprocess
+from dataclasses import asdict, dataclass
+from itertools import product
+from pathlib import Path
+
+import numcodecs
+import numpy as np
+import pytest
+from numcodecs import LZ4, LZMA, Blosc, GZip, VLenUTF8, Zstd
+
+import zarr
+from zarr.core.array import Array
+from zarr.core.dtype.npy.string import VariableLengthString
+from zarr.core.metadata.v2 import ArrayV2Metadata
+from zarr.storage import LocalStore
+
+
+def runner_installed() -> bool:
+    try:
+        subprocess.check_output(["uv", "--version"])
+        return True
+    except FileNotFoundError:
+        return False
+
+
+def array_metadata_equals(a: ArrayV2Metadata, b: ArrayV2Metadata) -> bool:
+    dict_a, dict_b = asdict(a), asdict(b)
+    fill_value_a, fill_value_b = dict_a.pop("fill_value"), dict_b.pop("fill_value")
+    if (
+        isinstance(fill_value_a, float)
+        and isinstance(fill_value_b, float)
+        and np.isnan(fill_value_a)
+        and np.isnan(fill_value_b)
+    ):
+        return dict_a == dict_b
+    else:
+        return fill_value_a == fill_value_b and dict_a == dict_b
+
+
+@dataclass(kw_only=True)
+class ArrayParams:
+    values: np.ndarray[tuple[int], np.dtype[np.generic]]
+    fill_value: np.generic | str
+    compressor: numcodecs.abc.Codec
+
+
+basic_codecs = GZip(), Blosc(), LZ4(), LZMA(), Zstd()
+basic_dtypes = "|b", ">i2", ">i4", ">f4", ">f8", "<f4", "<f8", ">c8", "<c8", ">c16", "<c16"
+datetime_dtypes = "<M8[10ns]", ">M8[10us]", "<m8[2ms]", ">m8[4ps]"
+string_dtypes = ">S1", "<S4", "<U1", ">U4"
+
+basic_array_cases = [
+    ArrayParams(values=np.arange(4, dtype=dtype), fill_value=1, compressor=codec)
+    for codec, dtype in product(basic_codecs, basic_dtypes)
+]
+datetime_array_cases = [
+    ArrayParams(values=np.ones((4,), dtype=dtype), fill_value=1, compressor=codec)
+    for codec, dtype in product(basic_codecs, datetime_dtypes)
+]
+string_array_cases = [
+    ArrayParams(
+        values=np.array(["aaaa", "bbbb", "ccccc", "dddd"], dtype=dtype),
+        fill_value="foo",
+        compressor=codec,
+    )
+    for codec, dtype in product(basic_codecs, string_dtypes)
+]
+vlen_string_cases = [
+    ArrayParams(
+        values=np.array(["a", "bb", "ccc", "dddd"], dtype="O"),
+        fill_value="1",
+        compressor=VLenUTF8(),
+    )
+]
+array_cases = basic_array_cases + datetime_array_cases + string_array_cases + vlen_string_cases
+
+
+@pytest.fixture
+def source_array(tmp_path: Path, request: pytest.FixtureRequest) -> Array:
+    dest = tmp_path / "in"
+    store = LocalStore(dest)
+    array_params: ArrayParams = request.param
+    compressor = array_params.compressor
+    if array_params.values.dtype == np.dtype("|O"):
+        dtype = VariableLengthString()
+    else:
+        dtype = array_params.values.dtype
+    z = zarr.create_array(
+        store,
+        shape=array_params.values.shape,
+        dtype=dtype,
+        chunks=array_params.values.shape,
+        compressors=compressor,
+        fill_value=array_params.fill_value,
+        order="C",
+        filters=None,
+        chunk_key_encoding={"name": "v2", "configuration": {"separator": "/"}},
+        write_data=True,
+        zarr_format=2,
+    )
+    z[:] = array_params.values
+    return z
+
+
+@pytest.mark.skipif(not runner_installed(), reason="no python script runner installed")
+@pytest.mark.parametrize(
+    "source_array", array_cases, indirect=True, ids=tuple(map(str, array_cases))
+)
+def test_roundtrip(source_array: Array, tmp_path: Path) -> None:
+    out_path = tmp_path / "out"
+    copy_op = subprocess.run(
+        [
+            "uv",
+            "run",
+            Path(__file__).resolve().parent / "v2.18.py",
+            str(source_array.store).removeprefix("file://"),
+            str(out_path),
+        ],
+        capture_output=True,
+        text=True,
+    )
+    assert copy_op.returncode == 0
+    out_array = zarr.open_array(store=out_path, mode="r", zarr_format=2)
+    assert array_metadata_equals(source_array.metadata, out_array.metadata)
+    assert np.array_equal(source_array[:], out_array[:])
diff --git a/tests/test_regression/v2.18.py b/tests/test_regression/v2.18.py
new file mode 100644
index 0000000000..39e1c5210c
--- /dev/null
+++ b/tests/test_regression/v2.18.py
@@ -0,0 +1,81 @@
+# /// script
+# requires-python = ">=3.11"
+# dependencies = [
+#  "zarr==2.18",
+#  "numcodecs==0.15"
+# ]
+# ///
+
+import argparse
+
+import zarr
+from zarr._storage.store import BaseStore
+
+
+def copy_group(
+    *, node: zarr.hierarchy.Group, store: zarr.storage.BaseStore, path: str, overwrite: bool
+) -> zarr.hierarchy.Group:
+    result = zarr.group(store=store, path=path, overwrite=overwrite)
+    result.attrs.put(node.attrs.asdict())
+    for key, child in node.items():
+        child_path = f"{path}/{key}"
+        if isinstance(child, zarr.hierarchy.Group):
+            copy_group(node=child, store=store, path=child_path, overwrite=overwrite)
+        elif isinstance(child, zarr.core.Array):
+            copy_array(node=child, store=store, overwrite=overwrite, path=child_path)
+    return result
+
+
+def copy_array(
+    *, node: zarr.core.Array, store: BaseStore, path: str, overwrite: bool
+) -> zarr.core.Array:
+    result = zarr.create(
+        shape=node.shape,
+        dtype=node.dtype,
+        fill_value=node.fill_value,
+        chunks=node.chunks,
+        compressor=node.compressor,
+        filters=node.filters,
+        order=node.order,
+        dimension_separator=node._dimension_separator,
+        store=store,
+        path=path,
+        overwrite=overwrite,
+    )
+    result.attrs.put(node.attrs.asdict())
+    result[:] = node[:]
+    return result
+
+
+def copy_node(
+    node: zarr.hierarchy.Group | zarr.core.Array, store: BaseStore, path: str, overwrite: bool
+) -> zarr.hierarchy.Group | zarr.core.Array:
+    if isinstance(node, zarr.hierarchy.Group):
+        return copy_group(node=node, store=store, path=path, overwrite=overwrite)
+    elif isinstance(node, zarr.core.Array):
+        return copy_array(node=node, store=store, path=path, overwrite=overwrite)
+    else:
+        raise TypeError(f"Unexpected node type: {type(node)}")  # pragma: no cover
+
+
+def cli() -> None:
+    parser = argparse.ArgumentParser(
+        description="Copy a zarr hierarchy from one location to another"
+    )
+    parser.add_argument("source", type=str, help="Path to the source zarr hierarchy")
+    parser.add_argument("destination", type=str, help="Path to the destination zarr hierarchy")
+    args = parser.parse_args()
+
+    src, dst = args.source, args.destination
+    root_src = zarr.open(src, mode="r")
+    result = copy_node(node=root_src, store=zarr.NestedDirectoryStore(dst), path="", overwrite=True)
+
+    print(f"successfully created {result} at {dst}")
+
+
+def main() -> None:
+    cli()
+
+
+if __name__ == "__main__":
+    main()

From 39219fa45b8be28db30dc29947c5d0c33f094df5 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Tue, 27 May 2025 11:33:05 +0200
Subject: [PATCH 112/130] truncate U and S scalars in _cast_value_unsafe

---
 src/zarr/core/dtype/npy/string.py | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/src/zarr/core/dtype/npy/string.py b/src/zarr/core/dtype/npy/string.py
index f65db5a984..b5b86ca387 100644
--- a/src/zarr/core/dtype/npy/string.py
+++ b/src/zarr/core/dtype/npy/string.py
@@ -86,8 +86,15 @@ def check_value(self, data: object) -> bool:
         # this is generous for backwards compatibility
         return isinstance(data, np.bytes_ | str | bytes | int)
 
-    def _cast_value_unsafe(self, value: object) -> np.bytes_:
-        return self.to_dtype().type(value)
+    def _cast_value_unsafe(self, data: object) -> np.bytes_:
+        # We explicitly truncate the result because of the following numpy behavior:
+        # >>> x = np.dtype('S3').type('hello world')
+        # >>> x
+        # np.bytes_(b'hello world')
+        # >>> x.dtype
+        # dtype('S11')
+
+        return self.to_dtype().type(data[: self.length])  # type: ignore[index]
 
     @property
     def item_size(self) -> int:
@@ -168,7 +175,14 @@ def check_value(self, data: object) -> bool:
         return isinstance(data, str | np.str_ | bytes | int)
 
     def _cast_value_unsafe(self, data: object) -> np.str_:
-        return self.to_dtype().type(data)
+        # We explicitly truncate the result because of the following numpy behavior:
+        # >>> x = np.dtype('U3').type('hello world')
+        # >>> x
+        # np.str_('hello world')
+        # >>> x.dtype
+        # dtype('U11')
+
+        return self.to_dtype().type(data[: self.length])  # type: ignore[index]
 
     @property
     def item_size(self) -> int:

From 4a7a5502349ba28e0fc1a484a1ab499ca32583f7 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Tue, 27 May 2025 11:58:34 +0200
Subject: [PATCH 113/130] docstrings and simplification for regression tests

---
 tests/test_regression/test_regression.py | 26 +++++++-----------------
 1 file changed, 7 insertions(+), 19 deletions(-)

diff --git a/tests/test_regression/test_regression.py b/tests/test_regression/test_regression.py
index 362e8d75b4..688c5ff89d 100644
--- a/tests/test_regression/test_regression.py
+++ b/tests/test_regression/test_regression.py
@@ -1,5 +1,5 @@
 import subprocess
-from dataclasses import asdict, dataclass
+from dataclasses import dataclass
 from itertools import product
 from pathlib import Path
 
@@ -11,36 +11,24 @@
 import zarr
 from zarr.core.array import Array
 from zarr.core.dtype.npy.string import VariableLengthString
-from zarr.core.metadata.v2 import ArrayV2Metadata
 from zarr.storage import LocalStore
 
 
 def runner_installed() -> bool:
+    """
+    Check if a PEP-723 compliant python script runner is installed.
+    """
     try:
         subprocess.check_output(["uv", "--version"])
-        return True
+        return True  # noqa: TRY300
     except FileNotFoundError:
         return False
 
 
-def array_metadata_equals(a: ArrayV2Metadata, b: ArrayV2Metadata) -> bool:
-    dict_a, dict_b = asdict(a), asdict(b)
-    fill_value_a, fill_value_b = dict_a.pop("fill_value"), dict_b.pop("fill_value")
-    if (
-        isinstance(fill_value_a, float)
-        and isinstance(fill_value_b, float)
-        and np.isnan(fill_value_a)
-        and np.isnan(fill_value_b)
-    ):
-        return dict_a == dict_b
-    else:
-        return fill_value_a == fill_value_b and dict_a == dict_b
-
-
 @dataclass(kw_only=True)
 class ArrayParams:
     values: np.ndarray[tuple[int], np.dtype[np.generic]]
-    fill_value: np.generic | str
+    fill_value: np.generic | str | int
     compressor: numcodecs.abc.Codec
 
 
@@ -121,5 +109,5 @@ def test_roundtrip(source_array: Array, tmp_path: Path) -> None:
     )
     assert copy_op.returncode == 0
     out_array = zarr.open_array(store=out_path, mode="r", zarr_format=2)
-    assert array_metadata_equals(source_array.metadata, out_array.metadata)
+    assert source_array.metadata.to_dict() == out_array.metadata.to_dict()
     assert np.array_equal(source_array[:], out_array[:])

From 807c585e9c15615cadd8d781c05422e84abdcff6 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Tue, 27 May 2025 12:29:34 +0200
Subject: [PATCH 114/130] changes necessary for linting with regression tests

---
 pyproject.toml                               |  3 ++-
 src/zarr/core/dtype/wrapper.py               |  1 +
 tests/test_dtype/test_wrapper.py             |  2 +-
 tests/test_regression/scripts/__init__.py    |  0
 tests/test_regression/{ => scripts}/v2.18.py |  0
 tests/test_regression/test_regression.py     | 20 ++++++++++++++++----
 6 files changed, 20 insertions(+), 6 deletions(-)
 create mode 100644 tests/test_regression/scripts/__init__.py
 rename tests/test_regression/{ => scripts}/v2.18.py (100%)

diff --git a/pyproject.toml b/pyproject.toml
index a43e51abd2..33904334e4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -283,6 +283,7 @@ extend-exclude = [
     "notebooks", # temporary, until we achieve compatibility with ruff ≥ 0.6
     "venv",
     "docs",
+    "tests/test_regression/scripts/", # these are scripts that use a different version of python
     "src/zarr/v2/",
     "tests/v2/",
 ]
@@ -353,7 +354,6 @@ strict = true
 warn_unreachable = true
 enable_error_code = ["ignore-without-code", "redundant-expr", "truthy-bool"]
 
-
 [[tool.mypy.overrides]]
 module = [
     "tests.package_with_entrypoint.*",
@@ -383,6 +383,7 @@ module = [
     "tests.test_properties",
     "tests.test_sync",
     "tests.test_v2",
+    "tests.test_regression.scripts.*"
 ]
 ignore_errors = true
 
diff --git a/src/zarr/core/dtype/wrapper.py b/src/zarr/core/dtype/wrapper.py
index 1a9d9b1e21..bd9686afc1 100644
--- a/src/zarr/core/dtype/wrapper.py
+++ b/src/zarr/core/dtype/wrapper.py
@@ -39,6 +39,7 @@
 # This is the bound for the dtypes that we support. If we support non-numpy dtypes,
 # then this bound will need to be widened.
 TBaseDType = np.dtype[np.generic]
+
 # These two type parameters are covariant because we want
 # x : ZDType[BaseDType, BaseScalar] = ZDType[SubDType, SubScalar]
 # to type check
diff --git a/tests/test_dtype/test_wrapper.py b/tests/test_dtype/test_wrapper.py
index a61fc1a9cd..9a5e3ee56d 100644
--- a/tests/test_dtype/test_wrapper.py
+++ b/tests/test_dtype/test_wrapper.py
@@ -91,7 +91,7 @@ def scalar_equals(self, scalar1: object, scalar2: object) -> bool:
         # but some classes may need to override this for special cases
         return scalar1 == scalar2
 
-    def test_check_dtype_valid(self, valid_dtype: object) -> None:
+    def test_check_dtype_valid(self, valid_dtype: TBaseDType) -> None:
         assert self.test_cls.check_dtype(valid_dtype)
 
     def test_check_dtype_invalid(self, invalid_dtype: object) -> None:
diff --git a/tests/test_regression/scripts/__init__.py b/tests/test_regression/scripts/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/tests/test_regression/v2.18.py b/tests/test_regression/scripts/v2.18.py
similarity index 100%
rename from tests/test_regression/v2.18.py
rename to tests/test_regression/scripts/v2.18.py
diff --git a/tests/test_regression/test_regression.py b/tests/test_regression/test_regression.py
index 688c5ff89d..61ff8ebfa9 100644
--- a/tests/test_regression/test_regression.py
+++ b/tests/test_regression/test_regression.py
@@ -2,6 +2,7 @@
 from dataclasses import dataclass
 from itertools import product
 from pathlib import Path
+from typing import TYPE_CHECKING
 
 import numcodecs
 import numpy as np
@@ -10,9 +11,13 @@
 
 import zarr
 from zarr.core.array import Array
+from zarr.core.chunk_key_encodings import V2ChunkKeyEncoding
 from zarr.core.dtype.npy.string import VariableLengthString
 from zarr.storage import LocalStore
 
+if TYPE_CHECKING:
+    from zarr.core.dtype import ZDTypeLike
+
 
 def runner_installed() -> bool:
     """
@@ -69,8 +74,10 @@ def source_array(tmp_path: Path, request: pytest.FixtureRequest) -> Array:
     store = LocalStore(dest)
     array_params: ArrayParams = request.param
     compressor = array_params.compressor
+    chunk_key_encoding = V2ChunkKeyEncoding(separator="/")
+    dtype: ZDTypeLike
     if array_params.values.dtype == np.dtype("|O"):
-        dtype = VariableLengthString()
+        dtype = VariableLengthString()  # type: ignore[assignment]
     else:
         dtype = array_params.values.dtype
     z = zarr.create_array(
@@ -82,7 +89,7 @@ def source_array(tmp_path: Path, request: pytest.FixtureRequest) -> Array:
         fill_value=array_params.fill_value,
         order="C",
         filters=None,
-        chunk_key_encoding={"name": "v2", "configuration": {"separator": "/"}},
+        chunk_key_encoding=chunk_key_encoding,
         write_data=True,
         zarr_format=2,
     )
@@ -90,17 +97,22 @@ def source_array(tmp_path: Path, request: pytest.FixtureRequest) -> Array:
     return z
 
 
+# TODO: make this dynamic based on the installed scripts
+script_paths = [Path(__file__).resolve().parent / "scripts" / "v2.18.py"]
+
+
 @pytest.mark.skipif(not runner_installed(), reason="no python script runner installed")
 @pytest.mark.parametrize(
     "source_array", array_cases, indirect=True, ids=tuple(map(str, array_cases))
 )
-def test_roundtrip(source_array: Array, tmp_path: Path) -> None:
+@pytest.mark.parametrize("script_path", script_paths)
+def test_roundtrip(source_array: Array, tmp_path: Path, script_path: Path) -> None:
     out_path = tmp_path / "out"
     copy_op = subprocess.run(
         [
             "uv",
             "run",
-            Path(__file__).resolve().parent / "v2.18.py",
+            script_path,
             str(source_array.store).removeprefix("file://"),
             str(out_path),
         ],

From 5150d607c7ec17d428b24aaa0596927b500704af Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Thu, 29 May 2025 12:40:03 +0200
Subject: [PATCH 115/130] improve method names, refactor type hints with
 typeddictionaries, fix registry load frequency, add object_codec_id for v2
 json deserialization

---
 docs/user-guide/arrays.rst                |   4 +-
 docs/user-guide/data_types.rst            |  10 +-
 src/zarr/codecs/_v2.py                    |   6 +-
 src/zarr/codecs/bytes.py                  |   2 +-
 src/zarr/codecs/sharding.py               |   4 +-
 src/zarr/codecs/vlen_utf8.py              |   2 +-
 src/zarr/core/array.py                    |   6 +-
 src/zarr/core/codec_pipeline.py           |   8 +-
 src/zarr/core/common.py                   |  10 +
 src/zarr/core/dtype/__init__.py           |  52 +--
 src/zarr/core/dtype/common.py             |  24 +-
 src/zarr/core/dtype/npy/bool.py           |  50 +--
 src/zarr/core/dtype/npy/common.py         |   5 +-
 src/zarr/core/dtype/npy/complex.py        |  53 ++--
 src/zarr/core/dtype/npy/float.py          |  48 +--
 src/zarr/core/dtype/npy/int.py            | 368 +++++++++++++++++-----
 src/zarr/core/dtype/npy/sized.py          | 220 +++++++------
 src/zarr/core/dtype/npy/string.py         | 258 +++++++++------
 src/zarr/core/dtype/npy/time.py           | 174 +++++-----
 src/zarr/core/dtype/registry.py           |  49 ++-
 src/zarr/core/dtype/wrapper.py            | 151 ++++++---
 src/zarr/core/metadata/v2.py              |  33 +-
 src/zarr/core/metadata/v3.py              |  10 +-
 tests/package_with_entrypoint/__init__.py |  12 +-
 tests/test_array.py                       |  22 +-
 tests/test_dtype/conftest.py              |   2 +-
 tests/test_dtype/test_npy/test_bool.py    |   6 +-
 tests/test_dtype/test_npy/test_common.py  |  36 ++-
 tests/test_dtype/test_npy/test_complex.py |   8 +-
 tests/test_dtype/test_npy/test_float.py   |  18 +-
 tests/test_dtype/test_npy/test_int.py     |  34 +-
 tests/test_dtype/test_npy/test_sized.py   |  18 +-
 tests/test_dtype/test_npy/test_string.py  |  36 ++-
 tests/test_dtype/test_npy/test_time.py    |  18 +-
 tests/test_dtype/test_wrapper.py          |  48 +--
 tests/test_dtype_registry.py              |  47 ++-
 tests/test_group.py                       |   7 +-
 tests/test_metadata/test_consolidated.py  |   2 +-
 tests/test_metadata/test_v2.py            |   2 +-
 tests/test_metadata/test_v3.py            |  14 +-
 tests/test_properties.py                  |   2 +-
 tests/test_regression/test_regression.py  |   6 +-
 tests/test_v2.py                          |  10 +-
 43 files changed, 1210 insertions(+), 685 deletions(-)

diff --git a/docs/user-guide/arrays.rst b/docs/user-guide/arrays.rst
index c27f1296b9..13190a4689 100644
--- a/docs/user-guide/arrays.rst
+++ b/docs/user-guide/arrays.rst
@@ -211,8 +211,8 @@ prints additional diagnostics, e.g.::
    Serializer         : BytesCodec(endian=<Endian.little: 'little'>)
    Compressors        : (BloscCodec(typesize=4, cname=<BloscCname.zstd: 'zstd'>, clevel=3, shuffle=<BloscShuffle.bitshuffle: 'bitshuffle'>, blocksize=0),)
    No. bytes          : 400000000 (381.5M)
-   No. bytes stored   : 3558573
-   Storage ratio      : 112.4
+   No. bytes stored   : 9696520
+   Storage ratio      : 41.3
    Chunks Initialized : 100
 
 .. note::
diff --git a/docs/user-guide/data_types.rst b/docs/user-guide/data_types.rst
index a4d8314a5e..c101ae50fc 100644
--- a/docs/user-guide/data_types.rst
+++ b/docs/user-guide/data_types.rst
@@ -128,20 +128,20 @@ Create a ``ZDType`` from a native data type:
 
   >>> from zarr.core.dtype import Int8
   >>> import numpy as np
-  >>> int8 = Int8.from_dtype(np.dtype('int8'))
+  >>> int8 = Int8.from_native_dtype(np.dtype('int8'))
 
 Convert back to native data type:
 
 .. code-block:: python
 
-  >>> native_dtype = int8.to_dtype()
+  >>> native_dtype = int8.to_native_dtype()
   >>> assert native_dtype == np.dtype('int8')
 
 Get the default scalar value for the data type:
 
 .. code-block:: python
 
-  >>> default_value = int8.default_value()
+  >>> default_value = int8.default_scalar()
   >>> assert default_value == np.int8(0)
 
 
@@ -160,7 +160,7 @@ Serialize a scalar value to JSON:
 
 .. code-block:: python
 
-  >>> json_value = int8.to_json_value(42, zarr_format=3)
+  >>> json_value = int8.to_json_scalar(42, zarr_format=3)
   >>> json_value
   42
 
@@ -168,5 +168,5 @@ Deserialize a scalar value from JSON:
 
 .. code-block:: python
 
-  >>> scalar_value = int8.from_json_value(42, zarr_format=3)
+  >>> scalar_value = int8.from_json_scalar(42, zarr_format=3)
   >>> assert scalar_value == np.int8(42)
diff --git a/src/zarr/codecs/_v2.py b/src/zarr/codecs/_v2.py
index c03e3c55fb..08853f27f1 100644
--- a/src/zarr/codecs/_v2.py
+++ b/src/zarr/codecs/_v2.py
@@ -48,7 +48,7 @@ async def _decode_single(
         # segfaults and other bad things happening
         if chunk_spec.dtype.dtype_cls is not np.dtypes.ObjectDType:
             try:
-                chunk = chunk.view(chunk_spec.dtype.to_dtype())
+                chunk = chunk.view(chunk_spec.dtype.to_native_dtype())
             except TypeError:
                 # this will happen if the dtype of the chunk
                 # does not match the dtype of the array spec i.g. if
@@ -56,7 +56,7 @@ async def _decode_single(
                 # is an object array. In this case, we need to convert the object
                 # array to the correct dtype.
 
-                chunk = np.array(chunk).astype(chunk_spec.dtype.to_dtype())
+                chunk = np.array(chunk).astype(chunk_spec.dtype.to_native_dtype())
 
         elif chunk.dtype != object:
             # If we end up here, someone must have hacked around with the filters.
@@ -80,7 +80,7 @@ async def _encode_single(
         chunk = chunk_array.as_ndarray_like()
 
         # ensure contiguous and correct order
-        chunk = chunk.astype(chunk_spec.dtype.to_dtype(), order=chunk_spec.order, copy=False)
+        chunk = chunk.astype(chunk_spec.dtype.to_native_dtype(), order=chunk_spec.order, copy=False)
 
         # apply filters
         if self.filters:
diff --git a/src/zarr/codecs/bytes.py b/src/zarr/codecs/bytes.py
index 5db39796e4..6ef0fef60b 100644
--- a/src/zarr/codecs/bytes.py
+++ b/src/zarr/codecs/bytes.py
@@ -79,7 +79,7 @@ async def _decode_single(
             "Endianness | None", self.endian.value if self.endian is not None else None
         )
         new_byte_order = endianness_to_numpy_str(endian_str)
-        dtype = chunk_spec.dtype.to_dtype().newbyteorder(new_byte_order)
+        dtype = chunk_spec.dtype.to_native_dtype().newbyteorder(new_byte_order)
 
         as_array_like = chunk_bytes.as_array_like()
         if isinstance(as_array_like, NDArrayLike):
diff --git a/src/zarr/codecs/sharding.py b/src/zarr/codecs/sharding.py
index 914236d700..cd8676b4d1 100644
--- a/src/zarr/codecs/sharding.py
+++ b/src/zarr/codecs/sharding.py
@@ -452,7 +452,7 @@ async def _decode_single(
         # setup output array
         out = chunk_spec.prototype.nd_buffer.create(
             shape=shard_shape,
-            dtype=shard_spec.dtype.to_dtype(),
+            dtype=shard_spec.dtype.to_native_dtype(),
             order=shard_spec.order,
             fill_value=0,
         )
@@ -499,7 +499,7 @@ async def _decode_partial_single(
         # setup output array
         out = shard_spec.prototype.nd_buffer.create(
             shape=indexer.shape,
-            dtype=shard_spec.dtype.to_dtype(),
+            dtype=shard_spec.dtype.to_native_dtype(),
             order=shard_spec.order,
             fill_value=0,
         )
diff --git a/src/zarr/codecs/vlen_utf8.py b/src/zarr/codecs/vlen_utf8.py
index 15bae8da81..b7c0418b2e 100644
--- a/src/zarr/codecs/vlen_utf8.py
+++ b/src/zarr/codecs/vlen_utf8.py
@@ -60,7 +60,7 @@ async def _decode_single(
         decoded = _vlen_utf8_codec.decode(raw_bytes)
         assert decoded.dtype == np.object_
         decoded.shape = chunk_spec.shape
-        as_string_dtype = decoded.astype(chunk_spec.dtype.to_dtype(), copy=False)
+        as_string_dtype = decoded.astype(chunk_spec.dtype.to_native_dtype(), copy=False)
         return chunk_spec.prototype.nd_buffer.from_numpy_array(as_string_dtype)
 
     async def _encode_single(
diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py
index e3d9e3fdaf..cd6b33a28c 100644
--- a/src/zarr/core/array.py
+++ b/src/zarr/core/array.py
@@ -700,7 +700,7 @@ def _create_metadata_v3(
 
         if fill_value is None:
             # v3 spec will not allow a null fill value
-            fill_value_parsed = dtype.default_value()
+            fill_value_parsed = dtype.default_scalar()
         else:
             fill_value_parsed = fill_value
 
@@ -782,7 +782,7 @@ def _create_metadata_v2(
         if dimension_separator is None:
             dimension_separator = "."
         if fill_value is None:
-            fill_value = dtype.default_value()  # type: ignore[assignment]
+            fill_value = dtype.default_scalar()  # type: ignore[assignment]
         return ArrayV2Metadata(
             shape=shape,
             dtype=dtype,
@@ -1056,7 +1056,7 @@ def dtype(self) -> TBaseDType:
         np.dtype
             Data type of the array
         """
-        return self._zdtype.to_dtype()
+        return self._zdtype.to_native_dtype()
 
     @property
     def order(self) -> MemoryOrder:
diff --git a/src/zarr/core/codec_pipeline.py b/src/zarr/core/codec_pipeline.py
index 3d00fe5467..23c27e40c6 100644
--- a/src/zarr/core/codec_pipeline.py
+++ b/src/zarr/core/codec_pipeline.py
@@ -62,7 +62,7 @@ def fill_value_or_default(chunk_spec: ArraySpec) -> Any:
         # validated when decoding the metadata, but we support reading
         # Zarr V2 data and need to support the case where fill_value
         # is None.
-        return chunk_spec.dtype.default_value()
+        return chunk_spec.dtype.default_scalar()
     else:
         return fill_value
 
@@ -296,7 +296,9 @@ def _merge_chunk_array(
         is_complete_chunk: bool,
         drop_axes: tuple[int, ...],
     ) -> NDBuffer:
-        if chunk_selection == () or is_scalar(value.as_ndarray_like(), chunk_spec.dtype.to_dtype()):
+        if chunk_selection == () or is_scalar(
+            value.as_ndarray_like(), chunk_spec.dtype.to_native_dtype()
+        ):
             chunk_value = value
         else:
             chunk_value = value[out_selection]
@@ -317,7 +319,7 @@ def _merge_chunk_array(
         if existing_chunk_array is None:
             chunk_array = chunk_spec.prototype.nd_buffer.create(
                 shape=chunk_spec.shape,
-                dtype=chunk_spec.dtype.to_dtype(),
+                dtype=chunk_spec.dtype.to_native_dtype(),
                 order=chunk_spec.order,
                 fill_value=fill_value_or_default(chunk_spec),
             )
diff --git a/src/zarr/core/common.py b/src/zarr/core/common.py
index 864959a948..2ba5914ea5 100644
--- a/src/zarr/core/common.py
+++ b/src/zarr/core/common.py
@@ -10,7 +10,9 @@
 from typing import (
     TYPE_CHECKING,
     Any,
+    Generic,
     Literal,
+    TypedDict,
     TypeVar,
     cast,
     overload,
@@ -39,6 +41,14 @@
 AccessModeLiteral = Literal["r", "r+", "a", "w", "w-"]
 DimensionNames = Iterable[str | None] | None
 
+TName = TypeVar("TName", bound=str)
+TConfig = TypeVar("TConfig", bound=Mapping[str, object])
+
+
+class NamedConfig(TypedDict, Generic[TName, TConfig]):
+    name: TName
+    configuration: TConfig
+
 
 def product(tup: ChunkCoords) -> int:
     return functools.reduce(operator.mul, tup, 1)
diff --git a/src/zarr/core/dtype/__init__.py b/src/zarr/core/dtype/__init__.py
index 9c672fd986..a8bfe2b5c4 100644
--- a/src/zarr/core/dtype/__init__.py
+++ b/src/zarr/core/dtype/__init__.py
@@ -16,12 +16,13 @@
 if TYPE_CHECKING:
     from zarr.core.common import ZarrFormat
 
+from collections.abc import Mapping
+
 import numpy as np
 import numpy.typing as npt
 
 from zarr.core.common import JSON
 from zarr.core.dtype.npy.string import (
-    _NUMPY_SUPPORTS_VLEN_STRING,
     FixedLengthASCII,
     FixedLengthUTF32,
     VariableLengthString,
@@ -102,7 +103,7 @@
 )
 
 # This type models inputs that can be coerced to a ZDType
-ZDTypeLike: TypeAlias = npt.DTypeLike | ZDType[TBaseDType, TBaseScalar] | dict[str, JSON] | str
+ZDTypeLike: TypeAlias = npt.DTypeLike | ZDType[TBaseDType, TBaseScalar] | Mapping[str, JSON] | str
 
 for dtype in ANY_DTYPE:
     # mypy does not know that all the elements of ANY_DTYPE are subclasses of ZDType
@@ -114,42 +115,41 @@ def get_data_type_from_native_dtype(dtype: npt.DTypeLike) -> ZDType[TBaseDType,
     """
     Get a data type wrapper (an instance of ``ZDType``) from a native data type, e.g. a numpy dtype.
     """
-    data_type_registry.lazy_load()
     if not isinstance(dtype, np.dtype):
-        # TODO: This check has a lot of assumptions in it! Chiefly, we assume that the
-        # numpy object dtype contains variable length strings, which is not in general true
-        # When / if zarr python supports ragged arrays, for example, this check will fail!
-        if dtype in (str, "str", "|T16", "O", "|O", np.dtypes.ObjectDType()):
-            if _NUMPY_SUPPORTS_VLEN_STRING:
-                na_dtype = np.dtype("T")
-            else:
-                na_dtype = np.dtype("O")
-        elif isinstance(dtype, list):
+        na_dtype: np.dtype[np.generic]
+        if isinstance(dtype, list):
             # this is a valid _VoidDTypeLike check
             na_dtype = np.dtype([tuple(d) for d in dtype])
         else:
             na_dtype = np.dtype(dtype)
     else:
         na_dtype = dtype
-    return data_type_registry.match_dtype(na_dtype)
+    return data_type_registry.match_dtype(dtype=na_dtype)
+
+
+def get_data_type_from_json_v3(
+    dtype_spec: JSON,
+) -> ZDType[TBaseDType, TBaseScalar]:
+    return data_type_registry.match_json_v3(dtype_spec)
 
 
-def get_data_type_from_json(
-    dtype: JSON, zarr_format: ZarrFormat
+def get_data_type_from_json_v2(
+    dtype_spec: JSON, *, object_codec_id: str | None = None
 ) -> ZDType[TBaseDType, TBaseScalar]:
-    return data_type_registry.match_json(dtype, zarr_format=zarr_format)
+    return data_type_registry.match_json_v2(dtype_spec, object_codec_id=object_codec_id)
 
 
-def parse_data_type(dtype: ZDTypeLike, zarr_format: ZarrFormat) -> ZDType[TBaseDType, TBaseScalar]:
+def parse_data_type(
+    dtype_spec: ZDTypeLike, *, zarr_format: ZarrFormat, object_codec_id: str | None = None
+) -> ZDType[TBaseDType, TBaseScalar]:
     """
     Interpret the input as a ZDType instance.
     """
-    if isinstance(dtype, ZDType):
-        return dtype
-    elif isinstance(dtype, dict):
-        # This branch assumes that the data type has been specified in the JSON form
-        # but it's also possible for numpy data types to be specified as dictionaries, which will
-        # cause an error in the `get_data_type_from_json`, but that's ok for now
-        return get_data_type_from_json(dtype, zarr_format=zarr_format)  # type: ignore[arg-type]
-    else:
-        return get_data_type_from_native_dtype(dtype)
+    if isinstance(dtype_spec, ZDType):
+        return dtype_spec
+    # dict and zarr_format 3 means that we have a JSON object representation of the dtype
+    if zarr_format == 3 and isinstance(dtype_spec, Mapping):
+        return get_data_type_from_json_v3(dtype_spec)  # type: ignore[arg-type]
+    # otherwise, we have either a numpy dtype string, or a zarr v3 dtype string, and in either case
+    # we can create a numpy dtype from it, and do the dtype inference from that
+    return get_data_type_from_native_dtype(dtype_spec)  # type: ignore[arg-type]
diff --git a/src/zarr/core/dtype/common.py b/src/zarr/core/dtype/common.py
index 5eeff2af5b..bbdc06c50d 100644
--- a/src/zarr/core/dtype/common.py
+++ b/src/zarr/core/dtype/common.py
@@ -2,7 +2,7 @@
 
 import warnings
 from dataclasses import dataclass
-from typing import Final, Literal
+from typing import ClassVar, Final, Literal
 
 Endianness = Literal["little", "big"]
 SpecialFloatStrings = Literal["NaN", "Infinity", "-Infinity"]
@@ -46,6 +46,28 @@ def item_size(self) -> int:
         raise NotImplementedError
 
 
+@dataclass(frozen=True)
+class HasObjectCodec:
+    """
+    A mix-in class for data types that require an object codec id.
+    This class bears the property ``object_codec_id``, which is the string name of an object
+    codec that is required to encode and decode the data type.
+
+    In zarr-python 2.x certain data types like variable-length strings or variable-length arrays
+    used the catch-all numpy "object" data type for their in-memory representation. But these data
+    types cannot be stored as numpy object data types, because the object data type does not define
+    a fixed memory layout. So these data types required a special codec, called an "object codec",
+    that effectively defined a compact representation for the data type, which was used to encode
+    and decode the data type.
+
+    Zarr-python 2.x would not allow the creation of arrays with the "object" data type if an object
+    codec was not specified, and thus the name of the object codec is effectively part of the data
+    type model.
+    """
+
+    object_codec_id: ClassVar[str]
+
+
 class UnstableSpecificationWarning(FutureWarning): ...
 
 
diff --git a/src/zarr/core/dtype/npy/bool.py b/src/zarr/core/dtype/npy/bool.py
index d46758f789..b1800127e8 100644
--- a/src/zarr/core/dtype/npy/bool.py
+++ b/src/zarr/core/dtype/npy/bool.py
@@ -1,12 +1,12 @@
 from dataclasses import dataclass
-from typing import ClassVar, Literal, Self, TypeGuard
+from typing import ClassVar, Literal, Self, TypeGuard, overload
 
 import numpy as np
 
 from zarr.core.common import JSON, ZarrFormat
 from zarr.core.dtype.common import HasItemSize
 from zarr.core.dtype.npy.common import check_json_bool
-from zarr.core.dtype.wrapper import TBaseDType, ZDType
+from zarr.core.dtype.wrapper import DTypeJSON_V2, DTypeJSON_V3, TBaseDType, ZDType
 
 
 @dataclass(frozen=True, kw_only=True, slots=True)
@@ -22,40 +22,50 @@ class Bool(ZDType[np.dtypes.BoolDType, np.bool_], HasItemSize):
         The numpy dtype class.
     """
 
-    _zarr_v3_name = "bool"
+    _zarr_v3_name: ClassVar[Literal["bool"]] = "bool"
     _zarr_v2_names: ClassVar[tuple[str, ...]] = ("|b1",)
     dtype_cls = np.dtypes.BoolDType
 
     @classmethod
-    def _from_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
+    def _from_native_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
         return cls()
 
-    def to_dtype(self: Self) -> np.dtypes.BoolDType:
+    def to_native_dtype(self: Self) -> np.dtypes.BoolDType:
         return self.dtype_cls()
 
     @classmethod
-    def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[Literal["bool", "|b1"]]:
+    def check_json_v2(
+        cls, data: JSON, *, object_codec_id: str | None = None
+    ) -> TypeGuard[Literal["|b1"]]:
         """
         Check that the input is a valid JSON representation of a bool.
         """
-        if zarr_format == 2:
-            return data in cls._zarr_v2_names
-        elif zarr_format == 3:
-            return data == cls._zarr_v3_name
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+        return data in cls._zarr_v2_names
+
+    @classmethod
+    def check_json_v3(cls, data: JSON) -> TypeGuard[Literal["bool"]]:
+        return data == cls._zarr_v3_name
+
+    @overload
+    def to_json(self, zarr_format: Literal[2]) -> Literal["|b1"]: ...
+
+    @overload
+    def to_json(self, zarr_format: Literal[3]) -> Literal["bool"]: ...
 
-    def to_json(self, zarr_format: ZarrFormat) -> str:
+    def to_json(self, zarr_format: ZarrFormat) -> Literal["|b1", "bool"]:
         if zarr_format == 2:
-            return self.to_dtype().str
+            return self.to_native_dtype().str
         elif zarr_format == 3:
             return self._zarr_v3_name
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     @classmethod
-    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
+    def _from_json_unchecked(
+        cls, data: DTypeJSON_V2 | DTypeJSON_V3, *, zarr_format: ZarrFormat
+    ) -> Self:
         return cls()
 
-    def default_value(self) -> np.bool_:
+    def default_scalar(self) -> np.bool_:
         """
         Get the default value for the boolean dtype.
 
@@ -66,7 +76,7 @@ def default_value(self) -> np.bool_:
         """
         return np.False_
 
-    def to_json_value(self, data: object, *, zarr_format: ZarrFormat) -> bool:
+    def to_json_scalar(self, data: object, *, zarr_format: ZarrFormat) -> bool:
         """
         Convert a scalar to a python bool.
 
@@ -84,7 +94,7 @@ def to_json_value(self, data: object, *, zarr_format: ZarrFormat) -> bool:
         """
         return bool(data)
 
-    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.bool_:
+    def from_json_scalar(self, data: JSON, *, zarr_format: ZarrFormat) -> np.bool_:
         """
         Read a JSON-serializable value as a numpy boolean scalar.
 
@@ -101,14 +111,14 @@ def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.bool_:
             The numpy boolean scalar.
         """
         if check_json_bool(data):
-            return self._cast_value_unsafe(data)
+            return self._cast_scalar_unchecked(data)
         raise TypeError(f"Invalid type: {data}. Expected a boolean.")  # pragma: no cover
 
-    def check_value(self, data: object) -> bool:
+    def check_scalar(self, data: object) -> bool:
         # Anything can become a bool
         return True
 
-    def _cast_value_unsafe(self, data: object) -> np.bool_:
+    def _cast_scalar_unchecked(self, data: object) -> np.bool_:
         return np.bool_(data)
 
     @property
diff --git a/src/zarr/core/dtype/npy/common.py b/src/zarr/core/dtype/npy/common.py
index 2481dcb150..03dc194a7a 100644
--- a/src/zarr/core/dtype/npy/common.py
+++ b/src/zarr/core/dtype/npy/common.py
@@ -176,7 +176,10 @@ def float_from_json_v3(data: JSONFloatV3) -> float:
         elif len(data[2:]) == 16:
             dtype_code = ">d"
         else:
-            msg = f"Invalid float value: {data!r}. Expected a string of length 4, 8, or 16."
+            msg = (
+                f"Invalid hexadecimal float value: {data!r}. "
+                "Expected the '0x' prefix to be followed by 4, 8, or 16 numeral characters"
+            )
             raise ValueError(msg)
         return float(struct.unpack(dtype_code, bytes.fromhex(data[2:]))[0])
     return float_from_json_v2(data)
diff --git a/src/zarr/core/dtype/npy/complex.py b/src/zarr/core/dtype/npy/complex.py
index ee52dd0577..f7db6fe94d 100644
--- a/src/zarr/core/dtype/npy/complex.py
+++ b/src/zarr/core/dtype/npy/complex.py
@@ -2,6 +2,7 @@
 from typing import (
     TYPE_CHECKING,
     ClassVar,
+    Literal,
     Self,
     TypeGuard,
     cast,
@@ -24,7 +25,7 @@
     endianness_from_numpy_str,
     endianness_to_numpy_str,
 )
-from zarr.core.dtype.wrapper import TBaseDType, ZDType
+from zarr.core.dtype.wrapper import DTypeJSON_V2, DTypeJSON_V3, TBaseDType, ZDType
 
 if TYPE_CHECKING:
     from zarr.core.dtype.npy.common import EndiannessNumpy
@@ -36,11 +37,11 @@ class BaseComplex(ZDType[TComplexDType_co, TComplexScalar_co], HasEndianness, Ha
     _zarr_v2_names: ClassVar[tuple[str, ...]]
 
     @classmethod
-    def _from_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
+    def _from_native_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
         byte_order = cast("EndiannessNumpy", dtype.byteorder)
         return cls(endianness=endianness_from_numpy_str(byte_order))
 
-    def to_dtype(self) -> TComplexDType_co:
+    def to_native_dtype(self) -> TComplexDType_co:
         byte_order = endianness_to_numpy_str(self.endianness)
         return self.dtype_cls().newbyteorder(byte_order)  # type: ignore[return-value]
 
@@ -59,37 +60,39 @@ def to_json(self, zarr_format: ZarrFormat) -> str:
             The JSON-serializable representation of the wrapped data type
         """
         if zarr_format == 2:
-            return self.to_dtype().str
+            return self.to_native_dtype().str
         elif zarr_format == 3:
             return self._zarr_v3_name
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     @classmethod
-    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
+    def _from_json_unchecked(
+        cls, data: DTypeJSON_V2 | DTypeJSON_V3, *, zarr_format: ZarrFormat
+    ) -> Self:
         if zarr_format == 2:
-            return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
+            return cls.from_native_dtype(np.dtype(data))  # type: ignore[arg-type]
         elif zarr_format == 3:
             return cls()
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     @classmethod
-    def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[JSON]:
+    def check_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> TypeGuard[str]:
         """
         Check that the input is a valid JSON representation of this data type.
         """
-        if zarr_format == 2:
-            return data in cls._zarr_v2_names
-        elif zarr_format == 3:
-            return data == cls._zarr_v3_name
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+        return data in cls._zarr_v2_names
+
+    @classmethod
+    def check_json_v3(cls, data: JSON) -> TypeGuard[str]:
+        return data == cls._zarr_v3_name
 
-    def check_value(self, data: object) -> bool:
+    def check_scalar(self, data: object) -> bool:
         return isinstance(data, ComplexLike)
 
-    def _cast_value_unsafe(self, data: object) -> TComplexScalar_co:
-        return self.to_dtype().type(data)  # type: ignore[arg-type, return-value]
+    def _cast_scalar_unchecked(self, data: object) -> TComplexScalar_co:
+        return self.to_native_dtype().type(data)  # type: ignore[arg-type, return-value]
 
-    def default_value(self) -> TComplexScalar_co:
+    def default_scalar(self) -> TComplexScalar_co:
         """
         Get the default value, which is 0 cast to this dtype
 
@@ -98,9 +101,9 @@ def default_value(self) -> TComplexScalar_co:
         Int scalar
             The default value.
         """
-        return self._cast_value_unsafe(0)
+        return self._cast_scalar_unchecked(0)
 
-    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> TComplexScalar_co:
+    def from_json_scalar(self, data: JSON, *, zarr_format: ZarrFormat) -> TComplexScalar_co:
         """
         Read a JSON-serializable value as a numpy float.
 
@@ -118,19 +121,19 @@ def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> TComplexSca
         """
         if zarr_format == 2:
             if check_json_complex_float_v2(data):
-                return self._cast_value_unsafe(complex_float_from_json_v2(data))
+                return self._cast_scalar_unchecked(complex_float_from_json_v2(data))
             raise TypeError(
                 f"Invalid type: {data}. Expected a float or a special string encoding of a float."
             )
         elif zarr_format == 3:
             if check_json_complex_float_v3(data):
-                return self._cast_value_unsafe(complex_float_from_json_v3(data))
+                return self._cast_scalar_unchecked(complex_float_from_json_v3(data))
             raise TypeError(
                 f"Invalid type: {data}. Expected a float or a special string encoding of a float."
             )
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
-    def to_json_value(self, data: object, *, zarr_format: ZarrFormat) -> JSON:
+    def to_json_scalar(self, data: object, *, zarr_format: ZarrFormat) -> JSON:
         """
         Convert an object to a JSON-serializable float.
 
@@ -148,16 +151,16 @@ def to_json_value(self, data: object, *, zarr_format: ZarrFormat) -> JSON:
             each of which is encoding according to a zarr-format-specific encoding.
         """
         if zarr_format == 2:
-            return complex_float_to_json_v2(self.cast_value(data))
+            return complex_float_to_json_v2(self.cast_scalar(data))
         elif zarr_format == 3:
-            return complex_float_to_json_v3(self.cast_value(data))
+            return complex_float_to_json_v3(self.cast_scalar(data))
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
 
 @dataclass(frozen=True, kw_only=True)
 class Complex64(BaseComplex[np.dtypes.Complex64DType, np.complex64]):
     dtype_cls = np.dtypes.Complex64DType
-    _zarr_v3_name = "complex64"
+    _zarr_v3_name: ClassVar[Literal["complex64"]] = "complex64"
     _zarr_v2_names: ClassVar[tuple[str, ...]] = (">c8", "<c8")
 
     @property
@@ -168,7 +171,7 @@ def item_size(self) -> int:
 @dataclass(frozen=True, kw_only=True)
 class Complex128(BaseComplex[np.dtypes.Complex128DType, np.complex128], HasEndianness):
     dtype_cls = np.dtypes.Complex128DType
-    _zarr_v3_name = "complex128"
+    _zarr_v3_name: ClassVar[Literal["complex128"]] = "complex128"
     _zarr_v2_names: ClassVar[tuple[str, ...]] = (">c16", "<c16")
 
     @property
diff --git a/src/zarr/core/dtype/npy/float.py b/src/zarr/core/dtype/npy/float.py
index 28f3ced63e..174b2338ae 100644
--- a/src/zarr/core/dtype/npy/float.py
+++ b/src/zarr/core/dtype/npy/float.py
@@ -19,7 +19,7 @@
     float_to_json_v2,
     float_to_json_v3,
 )
-from zarr.core.dtype.wrapper import TBaseDType, ZDType
+from zarr.core.dtype.wrapper import DTypeJSON_V2, DTypeJSON_V3, TBaseDType, ZDType
 
 
 @dataclass(frozen=True)
@@ -28,11 +28,11 @@ class BaseFloat(ZDType[TFloatDType_co, TFloatScalar_co], HasEndianness, HasItemS
     _zarr_v2_names: ClassVar[tuple[str, ...]]
 
     @classmethod
-    def _from_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
+    def _from_native_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
         byte_order = cast("EndiannessNumpy", dtype.byteorder)
         return cls(endianness=endianness_from_numpy_str(byte_order))
 
-    def to_dtype(self) -> TFloatDType_co:
+    def to_native_dtype(self) -> TFloatDType_co:
         byte_order = endianness_to_numpy_str(self.endianness)
         return self.dtype_cls().newbyteorder(byte_order)  # type: ignore[return-value]
 
@@ -51,37 +51,39 @@ def to_json(self, zarr_format: ZarrFormat) -> str:
             The JSON-serializable representation of the wrapped data type
         """
         if zarr_format == 2:
-            return self.to_dtype().str
+            return self.to_native_dtype().str
         elif zarr_format == 3:
             return self._zarr_v3_name
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     @classmethod
-    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
+    def _from_json_unchecked(
+        cls, data: DTypeJSON_V2 | DTypeJSON_V3, *, zarr_format: ZarrFormat
+    ) -> Self:
         if zarr_format == 2:
-            return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
+            return cls.from_native_dtype(np.dtype(data))  # type: ignore[arg-type]
         elif zarr_format == 3:
             return cls()
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     @classmethod
-    def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[JSON]:
+    def check_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> TypeGuard[str]:
         """
         Check that the input is a valid JSON representation of this data type.
         """
-        if zarr_format == 2:
-            return data in cls._zarr_v2_names
-        elif zarr_format == 3:
-            return data == cls._zarr_v3_name
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+        return data in cls._zarr_v2_names
+
+    @classmethod
+    def check_json_v3(cls, data: JSON) -> TypeGuard[str]:
+        return data == cls._zarr_v3_name
 
-    def check_value(self, data: object) -> TypeGuard[FloatLike]:
+    def check_scalar(self, data: object) -> TypeGuard[FloatLike]:
         return isinstance(data, FloatLike)
 
-    def _cast_value_unsafe(self, data: object) -> TFloatScalar_co:
-        return self.to_dtype().type(data)  # type: ignore[return-value, arg-type]
+    def _cast_scalar_unchecked(self, data: object) -> TFloatScalar_co:
+        return self.to_native_dtype().type(data)  # type: ignore[return-value, arg-type]
 
-    def default_value(self) -> TFloatScalar_co:
+    def default_scalar(self) -> TFloatScalar_co:
         """
         Get the default value, which is 0 cast to this dtype
 
@@ -90,9 +92,9 @@ def default_value(self) -> TFloatScalar_co:
         Int scalar
             The default value.
         """
-        return self._cast_value_unsafe(0)
+        return self._cast_scalar_unchecked(0)
 
-    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> TFloatScalar_co:
+    def from_json_scalar(self, data: JSON, *, zarr_format: ZarrFormat) -> TFloatScalar_co:
         """
         Read a JSON-serializable value as a numpy float.
 
@@ -110,14 +112,14 @@ def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> TFloatScala
         """
         if zarr_format == 2:
             if check_json_float_v2(data):
-                return self._cast_value_unsafe(float_from_json_v2(data))
+                return self._cast_scalar_unchecked(float_from_json_v2(data))
             else:
                 raise TypeError(
                     f"Invalid type: {data}. Expected a float or a special string encoding of a float."
                 )
         elif zarr_format == 3:
             if check_json_float_v3(data):
-                return self._cast_value_unsafe(float_from_json_v3(data))
+                return self._cast_scalar_unchecked(float_from_json_v3(data))
             else:
                 raise TypeError(
                     f"Invalid type: {data}. Expected a float or a special string encoding of a float."
@@ -125,7 +127,7 @@ def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> TFloatScala
         else:
             raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
-    def to_json_value(self, data: object, *, zarr_format: ZarrFormat) -> float | str:
+    def to_json_scalar(self, data: object, *, zarr_format: ZarrFormat) -> float | str:
         """
         Convert an object to a JSON-serializable float.
 
@@ -143,9 +145,9 @@ def to_json_value(self, data: object, *, zarr_format: ZarrFormat) -> float | str
             See the zarr specifications for details on the JSON encoding for floats.
         """
         if zarr_format == 2:
-            return float_to_json_v2(self._cast_value_unsafe(data))
+            return float_to_json_v2(self._cast_scalar_unchecked(data))
         elif zarr_format == 3:
-            return float_to_json_v3(self._cast_value_unsafe(data))
+            return float_to_json_v3(self._cast_scalar_unchecked(data))
         else:
             raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
diff --git a/src/zarr/core/dtype/npy/int.py b/src/zarr/core/dtype/npy/int.py
index db5869b202..92705917f9 100644
--- a/src/zarr/core/dtype/npy/int.py
+++ b/src/zarr/core/dtype/npy/int.py
@@ -1,5 +1,15 @@
 from dataclasses import dataclass
-from typing import ClassVar, Self, SupportsIndex, SupportsInt, TypeGuard, TypeVar, cast
+from typing import (
+    ClassVar,
+    Literal,
+    Self,
+    SupportsIndex,
+    SupportsInt,
+    TypeGuard,
+    TypeVar,
+    cast,
+    overload,
+)
 
 import numpy as np
 
@@ -11,7 +21,7 @@
     endianness_from_numpy_str,
     endianness_to_numpy_str,
 )
-from zarr.core.dtype.wrapper import TBaseDType, ZDType
+from zarr.core.dtype.wrapper import DTypeJSON_V2, DTypeJSON_V3, TBaseDType, ZDType
 
 _NumpyIntDType = (
     np.dtypes.Int8DType
@@ -36,44 +46,24 @@ class BaseInt(ZDType[TIntDType_co, TIntScalar_co], HasItemSize):
     # This attribute holds the possible zarr v2 JSON names for the data type
     _zarr_v2_names: ClassVar[tuple[str, ...]]
 
-    def to_json(self, zarr_format: ZarrFormat) -> str:
-        """
-        Convert the wrapped data type to a JSON-serializable form.
-
-        Parameters
-        ----------
-        zarr_format : ZarrFormat
-            The zarr format version.
-
-        Returns
-        -------
-        str
-            The JSON-serializable representation of the wrapped data type
-        """
-        if zarr_format == 2:
-            return self.to_dtype().str
-        elif zarr_format == 3:
-            return self._zarr_v3_name
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
-
     @classmethod
-    def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[JSON]:
+    def check_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> TypeGuard[str]:
         """
         Check that the input is a valid JSON representation of this data type.
         """
-        if zarr_format == 2:
-            return data in cls._zarr_v2_names
-        elif zarr_format == 3:
-            return data == cls._zarr_v3_name
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+        return data in cls._zarr_v2_names
 
-    def check_value(self, data: object) -> TypeGuard[IntLike]:
+    @classmethod
+    def check_json_v3(cls, data: JSON) -> TypeGuard[str]:
+        return data == cls._zarr_v3_name
+
+    def check_scalar(self, data: object) -> TypeGuard[IntLike]:
         return isinstance(data, IntLike)
 
-    def _cast_value_unsafe(self, data: object) -> TIntScalar_co:
-        return self.to_dtype().type(data)  # type: ignore[return-value, arg-type]
+    def _cast_scalar_unchecked(self, data: object) -> TIntScalar_co:
+        return self.to_native_dtype().type(data)  # type: ignore[return-value, arg-type]
 
-    def default_value(self) -> TIntScalar_co:
+    def default_scalar(self) -> TIntScalar_co:
         """
         Get the default value, which is 0 cast to this dtype
 
@@ -82,9 +72,9 @@ def default_value(self) -> TIntScalar_co:
         Int scalar
             The default value.
         """
-        return self._cast_value_unsafe(0)
+        return self._cast_scalar_unchecked(0)
 
-    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> TIntScalar_co:
+    def from_json_scalar(self, data: JSON, *, zarr_format: ZarrFormat) -> TIntScalar_co:
         """
         Read a JSON-serializable value as a numpy int scalar.
 
@@ -101,10 +91,10 @@ def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> TIntScalar_
             The numpy scalar.
         """
         if check_json_int(data):
-            return self._cast_value_unsafe(data)
+            return self._cast_scalar_unchecked(data)
         raise TypeError(f"Invalid type: {data}. Expected an integer.")
 
-    def to_json_value(self, data: object, *, zarr_format: ZarrFormat) -> int:
+    def to_json_scalar(self, data: object, *, zarr_format: ZarrFormat) -> int:
         """
         Convert an object to JSON-serializable scalar.
 
@@ -120,24 +110,52 @@ def to_json_value(self, data: object, *, zarr_format: ZarrFormat) -> int:
         int
             The JSON-serializable form of the scalar.
         """
-        return int(self.cast_value(data))
+        return int(self.cast_scalar(data))
 
 
 @dataclass(frozen=True, kw_only=True)
 class Int8(BaseInt[np.dtypes.Int8DType, np.int8]):
     dtype_cls = np.dtypes.Int8DType
-    _zarr_v3_name = "int8"
+    _zarr_v3_name: ClassVar[Literal["int8"]] = "int8"
     _zarr_v2_names: ClassVar[tuple[str, ...]] = ("|i1",)
 
+    @overload
+    def to_json(self, zarr_format: Literal[2]) -> Literal["|i1"]: ...
+
+    @overload
+    def to_json(self, zarr_format: Literal[3]) -> Literal["int8"]: ...
+
+    def to_json(self, zarr_format: ZarrFormat) -> Literal["int8", "|i1"]:
+        """
+        Convert the wrapped data type to a JSON-serializable form.
+
+        Parameters
+        ----------
+        zarr_format : ZarrFormat
+            The zarr format version.
+
+        Returns
+        -------
+        str
+            The JSON-serializable representation of the wrapped data type
+        """
+        if zarr_format == 2:
+            return self.to_native_dtype().str
+        elif zarr_format == 3:
+            return self._zarr_v3_name
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+
     @classmethod
-    def _from_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
+    def _from_native_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
         return cls()
 
-    def to_dtype(self: Self) -> np.dtypes.Int8DType:
+    def to_native_dtype(self: Self) -> np.dtypes.Int8DType:
         return self.dtype_cls()
 
     @classmethod
-    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
+    def _from_json_unchecked(
+        cls, data: DTypeJSON_V2 | DTypeJSON_V3, *, zarr_format: ZarrFormat
+    ) -> Self:
         return cls()
 
     @property
@@ -148,18 +166,46 @@ def item_size(self) -> int:
 @dataclass(frozen=True, kw_only=True)
 class UInt8(BaseInt[np.dtypes.UInt8DType, np.uint8]):
     dtype_cls = np.dtypes.UInt8DType
-    _zarr_v3_name = "uint8"
+    _zarr_v3_name: ClassVar[Literal["uint8"]] = "uint8"
     _zarr_v2_names: ClassVar[tuple[str, ...]] = ("|u1",)
 
+    @overload
+    def to_json(self, zarr_format: Literal[2]) -> Literal["|u1"]: ...
+
+    @overload
+    def to_json(self, zarr_format: Literal[3]) -> Literal["uint8"]: ...
+
+    def to_json(self, zarr_format: ZarrFormat) -> Literal["uint8", "|u1"]:
+        """
+        Convert the wrapped data type to a JSON-serializable form.
+
+        Parameters
+        ----------
+        zarr_format : ZarrFormat
+            The zarr format version.
+
+        Returns
+        -------
+        str
+            The JSON-serializable representation of the wrapped data type
+        """
+        if zarr_format == 2:
+            return self.to_native_dtype().str
+        elif zarr_format == 3:
+            return self._zarr_v3_name
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+
     @classmethod
-    def _from_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
+    def _from_native_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
         return cls()
 
-    def to_dtype(self: Self) -> np.dtypes.UInt8DType:
+    def to_native_dtype(self: Self) -> np.dtypes.UInt8DType:
         return self.dtype_cls()
 
     @classmethod
-    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
+    def _from_json_unchecked(
+        cls, data: DTypeJSON_V2 | DTypeJSON_V3, *, zarr_format: ZarrFormat
+    ) -> Self:
         return cls()
 
     @property
@@ -170,23 +216,51 @@ def item_size(self) -> int:
 @dataclass(frozen=True, kw_only=True)
 class Int16(BaseInt[np.dtypes.Int16DType, np.int16], HasEndianness):
     dtype_cls = np.dtypes.Int16DType
-    _zarr_v3_name = "int16"
+    _zarr_v3_name: ClassVar[Literal["int16"]] = "int16"
     _zarr_v2_names: ClassVar[tuple[str, ...]] = (">i2", "<i2")
 
+    @overload
+    def to_json(self, zarr_format: Literal[2]) -> Literal[">i2", "<i2"]: ...
+
+    @overload
+    def to_json(self, zarr_format: Literal[3]) -> Literal["int16"]: ...
+
+    def to_json(self, zarr_format: ZarrFormat) -> Literal["int16", ">i2", "<i2"]:
+        """
+        Convert the wrapped data type to a JSON-serializable form.
+
+        Parameters
+        ----------
+        zarr_format : ZarrFormat
+            The zarr format version.
+
+        Returns
+        -------
+        str
+            The JSON-serializable representation of the wrapped data type
+        """
+        if zarr_format == 2:
+            return self.to_native_dtype().str
+        elif zarr_format == 3:
+            return self._zarr_v3_name
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+
     @classmethod
-    def _from_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
+    def _from_native_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
         byte_order = cast("EndiannessNumpy", dtype.byteorder)
         return cls(endianness=endianness_from_numpy_str(byte_order))
 
-    def to_dtype(self) -> np.dtypes.Int16DType:
+    def to_native_dtype(self) -> np.dtypes.Int16DType:
         byte_order = endianness_to_numpy_str(self.endianness)
         return self.dtype_cls().newbyteorder(byte_order)
 
     @classmethod
-    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
+    def _from_json_unchecked(
+        cls, data: DTypeJSON_V2 | DTypeJSON_V3, *, zarr_format: ZarrFormat
+    ) -> Self:
         if zarr_format == 2:
             # This ensures that we get the endianness correct without annoying string parsing
-            return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
+            return cls.from_native_dtype(np.dtype(data))  # type: ignore[arg-type]
         elif zarr_format == 3:
             return cls()
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
@@ -199,22 +273,50 @@ def item_size(self) -> int:
 @dataclass(frozen=True, kw_only=True)
 class UInt16(BaseInt[np.dtypes.UInt16DType, np.uint16], HasEndianness):
     dtype_cls = np.dtypes.UInt16DType
-    _zarr_v3_name = "uint16"
+    _zarr_v3_name: ClassVar[Literal["uint16"]] = "uint16"
     _zarr_v2_names: ClassVar[tuple[str, ...]] = (">u2", "<u2")
 
+    @overload
+    def to_json(self, zarr_format: Literal[2]) -> Literal[">u2", "<u2"]: ...
+
+    @overload
+    def to_json(self, zarr_format: Literal[3]) -> Literal["uint16"]: ...
+
+    def to_json(self, zarr_format: ZarrFormat) -> Literal["uint16", ">u2", "<u2"]:
+        """
+        Convert the wrapped data type to a JSON-serializable form.
+
+        Parameters
+        ----------
+        zarr_format : ZarrFormat
+            The zarr format version.
+
+        Returns
+        -------
+        str
+            The JSON-serializable representation of the wrapped data type
+        """
+        if zarr_format == 2:
+            return self.to_native_dtype().str
+        elif zarr_format == 3:
+            return self._zarr_v3_name
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+
     @classmethod
-    def _from_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
+    def _from_native_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
         byte_order = cast("EndiannessNumpy", dtype.byteorder)
         return cls(endianness=endianness_from_numpy_str(byte_order))
 
-    def to_dtype(self) -> np.dtypes.UInt16DType:
+    def to_native_dtype(self) -> np.dtypes.UInt16DType:
         byte_order = endianness_to_numpy_str(self.endianness)
         return self.dtype_cls().newbyteorder(byte_order)
 
     @classmethod
-    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
+    def _from_json_unchecked(
+        cls, data: DTypeJSON_V2 | DTypeJSON_V3, *, zarr_format: ZarrFormat
+    ) -> Self:
         if zarr_format == 2:
-            return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
+            return cls.from_native_dtype(np.dtype(data))  # type: ignore[arg-type]
         elif zarr_format == 3:
             return cls()
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
@@ -227,34 +329,64 @@ def item_size(self) -> int:
 @dataclass(frozen=True, kw_only=True)
 class Int32(BaseInt[np.dtypes.Int32DType, np.int32], HasEndianness):
     dtype_cls = np.dtypes.Int32DType
-    _zarr_v3_name = "int32"
+    _zarr_v3_name: ClassVar[Literal["int32"]] = "int32"
     _zarr_v2_names: ClassVar[tuple[str, ...]] = (">i4", "<i4")
 
+    @overload
+    def to_json(self, zarr_format: Literal[2]) -> Literal[">i4", "<i4"]: ...
+
+    @overload
+    def to_json(self, zarr_format: Literal[3]) -> Literal["int32"]: ...
+
+    def to_json(self, zarr_format: ZarrFormat) -> Literal["int32", ">i4", "<i4"]:
+        """
+        Convert the wrapped data type to a JSON-serializable form.
+
+        Parameters
+        ----------
+        zarr_format : ZarrFormat
+            The zarr format version.
+
+        Returns
+        -------
+        str
+            The JSON-serializable representation of the wrapped data type
+        """
+        if zarr_format == 2:
+            return self.to_native_dtype().str
+        elif zarr_format == 3:
+            return self._zarr_v3_name
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+
     @classmethod
-    def from_dtype(cls: type[Self], dtype: TBaseDType) -> Self:
+    def from_native_dtype(cls: type[Self], dtype: TBaseDType) -> Self:
         # We override the base implementation to address a windows-specific, pre-numpy 2 issue where
         # ``np.dtype('i')`` is an instance of ``np.dtypes.IntDType`` that acts like `int32` instead of ``np.dtype('int32')``
         # In this case, ``type(np.dtype('i')) == np.dtypes.Int32DType``  will evaluate to ``True``,
         # despite the two classes being different. Thus we will create an instance of `cls` with the
         # latter dtype, after pulling in the byte order of the input
         if dtype == np.dtypes.Int32DType():
-            return cls._from_dtype_unsafe(np.dtypes.Int32DType().newbyteorder(dtype.byteorder))
+            return cls._from_native_dtype_unsafe(
+                np.dtypes.Int32DType().newbyteorder(dtype.byteorder)
+            )
         else:
-            return super().from_dtype(dtype)
+            return super().from_native_dtype(dtype)
 
     @classmethod
-    def _from_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
+    def _from_native_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
         byte_order = cast("EndiannessNumpy", dtype.byteorder)
         return cls(endianness=endianness_from_numpy_str(byte_order))
 
-    def to_dtype(self) -> np.dtypes.Int32DType:
+    def to_native_dtype(self) -> np.dtypes.Int32DType:
         byte_order = endianness_to_numpy_str(self.endianness)
         return self.dtype_cls().newbyteorder(byte_order)
 
     @classmethod
-    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
+    def _from_json_unchecked(
+        cls, data: DTypeJSON_V2 | DTypeJSON_V3, *, zarr_format: ZarrFormat
+    ) -> Self:
         if zarr_format == 2:
-            return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
+            return cls.from_native_dtype(np.dtype(data))  # type: ignore[arg-type]
         elif zarr_format == 3:
             return cls()
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
@@ -267,22 +399,48 @@ def item_size(self) -> int:
 @dataclass(frozen=True, kw_only=True)
 class UInt32(BaseInt[np.dtypes.UInt32DType, np.uint32], HasEndianness):
     dtype_cls = np.dtypes.UInt32DType
-    _zarr_v3_name = "uint32"
+    _zarr_v3_name: ClassVar[Literal["uint32"]] = "uint32"
     _zarr_v2_names: ClassVar[tuple[str, ...]] = (">u4", "<u4")
 
+    @overload
+    def to_json(self, zarr_format: Literal[2]) -> Literal[">u4", "<u4"]: ...
+    @overload
+    def to_json(self, zarr_format: Literal[3]) -> Literal["uint32"]: ...
+    def to_json(self, zarr_format: ZarrFormat) -> Literal["uint32", ">u4", "<u4"]:
+        """
+        Convert the wrapped data type to a JSON-serializable form.
+
+        Parameters
+        ----------
+        zarr_format : ZarrFormat
+            The zarr format version.
+
+        Returns
+        -------
+        str
+            The JSON-serializable representation of the wrapped data type
+        """
+        if zarr_format == 2:
+            return self.to_native_dtype().str
+        elif zarr_format == 3:
+            return self._zarr_v3_name
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+
     @classmethod
-    def _from_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
+    def _from_native_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
         byte_order = cast("EndiannessNumpy", dtype.byteorder)
         return cls(endianness=endianness_from_numpy_str(byte_order))
 
-    def to_dtype(self) -> np.dtypes.UInt32DType:
+    def to_native_dtype(self) -> np.dtypes.UInt32DType:
         byte_order = endianness_to_numpy_str(self.endianness)
         return self.dtype_cls().newbyteorder(byte_order)
 
     @classmethod
-    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
+    def _from_json_unchecked(
+        cls, data: DTypeJSON_V2 | DTypeJSON_V3, *, zarr_format: ZarrFormat
+    ) -> Self:
         if zarr_format == 2:
-            return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
+            return cls.from_native_dtype(np.dtype(data))  # type: ignore[arg-type]
         elif zarr_format == 3:
             return cls()
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
@@ -295,22 +453,48 @@ def item_size(self) -> int:
 @dataclass(frozen=True, kw_only=True)
 class Int64(BaseInt[np.dtypes.Int64DType, np.int64], HasEndianness):
     dtype_cls = np.dtypes.Int64DType
-    _zarr_v3_name = "int64"
+    _zarr_v3_name: ClassVar[Literal["int64"]] = "int64"
     _zarr_v2_names: ClassVar[tuple[str, ...]] = (">i8", "<i8")
 
+    @overload
+    def to_json(self, zarr_format: Literal[2]) -> Literal[">i8", "<i8"]: ...
+    @overload
+    def to_json(self, zarr_format: Literal[3]) -> Literal["int64"]: ...
+    def to_json(self, zarr_format: ZarrFormat) -> Literal["int64", ">i8", "<i8"]:
+        """
+        Convert the wrapped data type to a JSON-serializable form.
+
+        Parameters
+        ----------
+        zarr_format : ZarrFormat
+            The zarr format version.
+
+        Returns
+        -------
+        str
+            The JSON-serializable representation of the wrapped data type
+        """
+        if zarr_format == 2:
+            return self.to_native_dtype().str
+        elif zarr_format == 3:
+            return self._zarr_v3_name
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
+
     @classmethod
-    def _from_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
+    def _from_native_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
         byte_order = cast("EndiannessNumpy", dtype.byteorder)
         return cls(endianness=endianness_from_numpy_str(byte_order))
 
-    def to_dtype(self) -> np.dtypes.Int64DType:
+    def to_native_dtype(self) -> np.dtypes.Int64DType:
         byte_order = endianness_to_numpy_str(self.endianness)
         return self.dtype_cls().newbyteorder(byte_order)
 
     @classmethod
-    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
+    def _from_json_unchecked(
+        cls, data: DTypeJSON_V2 | DTypeJSON_V3, *, zarr_format: ZarrFormat
+    ) -> Self:
         if zarr_format == 2:
-            return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
+            return cls.from_native_dtype(np.dtype(data))  # type: ignore[arg-type]
         elif zarr_format == 3:
             return cls()
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
@@ -323,22 +507,50 @@ def item_size(self) -> int:
 @dataclass(frozen=True, kw_only=True)
 class UInt64(BaseInt[np.dtypes.UInt64DType, np.uint64], HasEndianness):
     dtype_cls = np.dtypes.UInt64DType
-    _zarr_v3_name = "uint64"
+    _zarr_v3_name: ClassVar[Literal["uint64"]] = "uint64"
     _zarr_v2_names: ClassVar[tuple[str, ...]] = (">u8", "<u8")
 
+    @overload
+    def to_json(self, zarr_format: Literal[2]) -> Literal[">u8", "<u8"]: ...
+
+    @overload
+    def to_json(self, zarr_format: Literal[3]) -> Literal["uint64"]: ...
+
+    def to_json(self, zarr_format: ZarrFormat) -> Literal["uint64", ">u8", "<u8"]:
+        """
+        Convert the wrapped data type to a JSON-serializable form.
+
+        Parameters
+        ----------
+        zarr_format : ZarrFormat
+            The zarr format version.
+
+        Returns
+        -------
+        str
+            The JSON-serializable representation of the wrapped data type
+        """
+        if zarr_format == 2:
+            return self.to_native_dtype().str
+        elif zarr_format == 3:
+            return self._zarr_v3_name
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+
     @classmethod
-    def _from_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
+    def _from_native_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
         byte_order = cast("EndiannessNumpy", dtype.byteorder)
         return cls(endianness=endianness_from_numpy_str(byte_order))
 
-    def to_dtype(self) -> np.dtypes.UInt64DType:
+    def to_native_dtype(self) -> np.dtypes.UInt64DType:
         byte_order = endianness_to_numpy_str(self.endianness)
         return self.dtype_cls().newbyteorder(byte_order)
 
     @classmethod
-    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
+    def _from_json_unchecked(
+        cls, data: DTypeJSON_V2 | DTypeJSON_V3, *, zarr_format: ZarrFormat
+    ) -> Self:
         if zarr_format == 2:
-            return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
+            return cls.from_native_dtype(np.dtype(data))  # type: ignore[arg-type]
         elif zarr_format == 3:
             return cls()
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
diff --git a/src/zarr/core/dtype/npy/sized.py b/src/zarr/core/dtype/npy/sized.py
index eb2b39ad9a..69d6145ad4 100644
--- a/src/zarr/core/dtype/npy/sized.py
+++ b/src/zarr/core/dtype/npy/sized.py
@@ -2,11 +2,11 @@
 import re
 from collections.abc import Sequence
 from dataclasses import dataclass
-from typing import Any, Self, TypeGuard, cast
+from typing import Any, ClassVar, Literal, Self, TypedDict, TypeGuard, cast, overload
 
 import numpy as np
 
-from zarr.core.common import JSON, ZarrFormat
+from zarr.core.common import JSON, NamedConfig, ZarrFormat
 from zarr.core.dtype.common import (
     DataTypeValidationError,
     HasItemSize,
@@ -18,7 +18,14 @@
     bytes_to_json,
     check_json_str,
 )
-from zarr.core.dtype.wrapper import TBaseDType, TBaseScalar, ZDType
+from zarr.core.dtype.wrapper import DTypeJSON_V2, DTypeJSON_V3, TBaseDType, TBaseScalar, ZDType
+
+
+class FixedLengthBytesConfig(TypedDict):
+    length_bytes: int
+
+
+FixedLengthBytesJSONV3 = NamedConfig[Literal["fixed_length_bytes"], FixedLengthBytesConfig]
 
 
 @dataclass(frozen=True, kw_only=True)
@@ -27,49 +34,59 @@ class FixedLengthBytes(ZDType[np.dtypes.VoidDType[int], np.void], HasLength, Has
     # it cannot be used to create instances of the dtype
     # so we have to tell mypy to ignore this here
     dtype_cls = np.dtypes.VoidDType  # type: ignore[assignment]
-    _zarr_v3_name = "numpy.fixed_length_bytes"
+    _zarr_v3_name: ClassVar[Literal["fixed_length_bytes"]] = "fixed_length_bytes"
 
     @classmethod
-    def _from_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
+    def _from_native_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
         return cls(length=dtype.itemsize)
 
-    def to_dtype(self) -> np.dtypes.VoidDType[int]:
+    def to_native_dtype(self) -> np.dtypes.VoidDType[int]:
         # Numpy does not allow creating a void type
         # by invoking np.dtypes.VoidDType directly
         return cast("np.dtypes.VoidDType[int]", np.dtype(f"V{self.length}"))
 
     @classmethod
-    def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[JSON]:
-        if zarr_format == 2:
-            # Check that the dtype is |V1, |V2, ...
-            return isinstance(data, str) and re.match(r"^\|V\d+$", data) is not None
-        elif zarr_format == 3:
-            return (
-                isinstance(data, dict)
-                and set(data.keys()) == {"name", "configuration"}
-                and data["name"] == cls._zarr_v3_name
-                and isinstance(data["configuration"], dict)
-                and set(data["configuration"].keys()) == {"length_bytes"}
-            )
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+    def check_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> TypeGuard[str]:
+        # Check that the dtype is |V1, |V2, ...
+        return isinstance(data, str) and re.match(r"^\|V\d+$", data) is not None
 
-    def to_json(self, zarr_format: ZarrFormat) -> JSON:
+    @classmethod
+    def check_json_v3(cls, data: JSON) -> TypeGuard[FixedLengthBytesJSONV3]:
+        return (
+            isinstance(data, dict)
+            and set(data.keys()) == {"name", "configuration"}
+            and data["name"] == cls._zarr_v3_name
+            and isinstance(data["configuration"], dict)
+            and set(data["configuration"].keys()) == {"length_bytes"}
+        )
+
+    @overload
+    def to_json(self, zarr_format: Literal[2]) -> str: ...
+
+    @overload
+    def to_json(self, zarr_format: Literal[3]) -> FixedLengthBytesJSONV3: ...
+
+    def to_json(self, zarr_format: ZarrFormat) -> str | FixedLengthBytesJSONV3:
         if zarr_format == 2:
-            return self.to_dtype().str
+            return self.to_native_dtype().str
         elif zarr_format == 3:
             return {"name": self._zarr_v3_name, "configuration": {"length_bytes": self.length}}
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     @classmethod
-    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
+    def _from_json_unchecked(
+        cls, data: DTypeJSON_V2 | DTypeJSON_V3, *, zarr_format: ZarrFormat
+    ) -> Self:
         if zarr_format == 2:
-            return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
+            return cls.from_native_dtype(np.dtype(data))  # type: ignore[arg-type]
         elif zarr_format == 3:
-            return cls(length=data["configuration"]["length_bytes"])  # type: ignore[arg-type, index, call-overload]
+            return cls(length=data["configuration"]["length_bytes"])  # type: ignore[index, call-overload]
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     @classmethod
-    def check_dtype(cls: type[Self], dtype: TBaseDType) -> TypeGuard[np.dtypes.VoidDType[Any]]:
+    def check_native_dtype(
+        cls: type[Self], dtype: TBaseDType
+    ) -> TypeGuard[np.dtypes.VoidDType[Any]]:
         """
         Numpy void dtype comes in two forms:
         * If the ``fields`` attribute is ``None``, then the dtype represents N raw bytes.
@@ -89,22 +106,22 @@ def check_dtype(cls: type[Self], dtype: TBaseDType) -> TypeGuard[np.dtypes.VoidD
         """
         return cls.dtype_cls is type(dtype) and dtype.fields is None  # type: ignore[has-type]
 
-    def default_value(self) -> np.void:
-        return self.to_dtype().type(("\x00" * self.length).encode("ascii"))
+    def default_scalar(self) -> np.void:
+        return self.to_native_dtype().type(("\x00" * self.length).encode("ascii"))
 
-    def to_json_value(self, data: object, *, zarr_format: ZarrFormat) -> str:
-        return base64.standard_b64encode(self.cast_value(data).tobytes()).decode("ascii")
+    def to_json_scalar(self, data: object, *, zarr_format: ZarrFormat) -> str:
+        return base64.standard_b64encode(self.cast_scalar(data).tobytes()).decode("ascii")
 
-    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.void:
+    def from_json_scalar(self, data: JSON, *, zarr_format: ZarrFormat) -> np.void:
         if check_json_str(data):
-            return self.to_dtype().type(base64.standard_b64decode(data))
+            return self.to_native_dtype().type(base64.standard_b64decode(data))
         raise TypeError(f"Invalid type: {data}. Expected a string.")  # pragma: no cover
 
-    def check_value(self, data: object) -> bool:
+    def check_scalar(self, data: object) -> bool:
         return isinstance(data, np.bytes_ | str | bytes | np.void)
 
-    def _cast_value_unsafe(self, data: object) -> np.void:
-        native_dtype = self.to_dtype()
+    def _cast_scalar_unchecked(self, data: object) -> np.void:
+        native_dtype = self.to_native_dtype()
         # Without the second argument, numpy will return a void scalar for dtype V1.
         # The second argument ensures that, if native_dtype is something like V10,
         # the result will actually be a V10 scalar.
@@ -115,17 +132,18 @@ def item_size(self) -> int:
         return self.length
 
 
+# TODO: tighten this up, get a v3 spec in place, handle endianness, etc.
 @dataclass(frozen=True, kw_only=True)
 class Structured(ZDType[np.dtypes.VoidDType[int], np.void], HasItemSize):
     dtype_cls = np.dtypes.VoidDType  # type: ignore[assignment]
     _zarr_v3_name = "structured"
     fields: tuple[tuple[str, ZDType[TBaseDType, TBaseScalar]], ...]
 
-    def default_value(self) -> np.void:
-        return self._cast_value_unsafe(0)
+    def default_scalar(self) -> np.void:
+        return self._cast_scalar_unchecked(0)
 
-    def _cast_value_unsafe(self, data: object) -> np.void:
-        na_dtype = self.to_dtype()
+    def _cast_scalar_unchecked(self, data: object) -> np.void:
+        na_dtype = self.to_native_dtype()
         if isinstance(data, bytes):
             res = np.frombuffer(data, dtype=na_dtype)[0]
         elif isinstance(data, list | tuple):
@@ -135,7 +153,7 @@ def _cast_value_unsafe(self, data: object) -> np.void:
         return cast("np.void", res)
 
     @classmethod
-    def check_dtype(cls, dtype: TBaseDType) -> TypeGuard[np.dtypes.VoidDType[int]]:
+    def check_native_dtype(cls, dtype: TBaseDType) -> TypeGuard[np.dtypes.VoidDType[int]]:
         """
         Check that this dtype is a numpy structured dtype
 
@@ -149,10 +167,10 @@ def check_dtype(cls, dtype: TBaseDType) -> TypeGuard[np.dtypes.VoidDType[int]]:
         TypeGuard[np.dtypes.VoidDType]
             True if the dtype matches, False otherwise.
         """
-        return super().check_dtype(dtype) and dtype.fields is not None
+        return super().check_native_dtype(dtype) and dtype.fields is not None
 
     @classmethod
-    def _from_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
+    def _from_native_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
         from zarr.core.dtype import get_data_type_from_native_dtype
 
         fields: list[tuple[str, ZDType[TBaseDType, TBaseScalar]]] = []
@@ -168,7 +186,13 @@ def _from_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
 
         return cls(fields=tuple(fields))
 
-    def to_json(self, zarr_format: ZarrFormat) -> JSON:
+    @overload
+    def to_json(self, zarr_format: Literal[2]) -> DTypeJSON_V2: ...
+
+    @overload
+    def to_json(self, zarr_format: Literal[3]) -> DTypeJSON_V3: ...
+
+    def to_json(self, zarr_format: ZarrFormat) -> DTypeJSON_V3 | DTypeJSON_V2:
         fields = [
             (f_name, f_dtype.to_json(zarr_format=zarr_format)) for f_name, f_dtype in self.fields
         ]
@@ -178,90 +202,94 @@ def to_json(self, zarr_format: ZarrFormat) -> JSON:
             v3_unstable_dtype_warning(self)
             base_dict = {"name": self._zarr_v3_name}
             base_dict["configuration"] = {"fields": fields}  # type: ignore[assignment]
-            return cast("JSON", base_dict)
+            return cast("DTypeJSON_V3", base_dict)
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     @classmethod
-    def check_json(
-        cls, data: JSON, zarr_format: ZarrFormat
-    ) -> TypeGuard[dict[str, JSON] | list[Any]]:
+    def check_json_v2(
+        cls, data: JSON, *, object_codec_id: str | None = None
+    ) -> TypeGuard[list[object]]:
         # the actual JSON form is recursive and hard to annotate, so we give up and do
-        # list[Any] for now
-        if zarr_format == 2:
-            return (
-                not isinstance(data, str)
-                and isinstance(data, Sequence)
-                and all(
-                    not isinstance(field, str) and isinstance(field, Sequence) and len(field) == 2
-                    for field in data
-                )
-            )
-        elif zarr_format == 3:
-            return (
-                isinstance(data, dict)
-                and "name" in data
-                and "configuration" in data
-                and isinstance(data["configuration"], dict)
-                and "fields" in data["configuration"]
+        # list[object] for now
+
+        return (
+            not isinstance(data, str)
+            and isinstance(data, Sequence)
+            and all(
+                not isinstance(field, str) and isinstance(field, Sequence) and len(field) == 2
+                for field in data
             )
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+        )
+
+    @classmethod
+    def check_json_v3(
+        cls, data: JSON
+    ) -> TypeGuard[NamedConfig[Literal["structured"], dict[str, Sequence[tuple[str, JSON]]]]]:
+        return (
+            isinstance(data, dict)
+            and "name" in data
+            and data["name"] == cls._zarr_v3_name
+            and "configuration" in data
+            and isinstance(data["configuration"], dict)
+            and "fields" in data["configuration"]
+        )
 
     @classmethod
-    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
-        from zarr.core.dtype import get_data_type_from_json
+    def _from_json_unchecked(
+        cls, data: DTypeJSON_V2 | DTypeJSON_V3, *, zarr_format: ZarrFormat
+    ) -> Self:
+        # avoid circular import issues by importing these functions here
+        from zarr.core.dtype import get_data_type_from_json_v2, get_data_type_from_json_v3
 
         # This is a horrible mess, because this data type is recursive
-        if cls.check_json(data, zarr_format=zarr_format):
-            if zarr_format == 2:
+        if zarr_format == 2:
+            if cls.check_json_v2(data):  # type: ignore[arg-type]
                 # structured dtypes are constructed directly from a list of lists
+                # note that we do not handle the object codec here! this will prevent structured
+                # dtypes from containing object dtypes.
                 return cls(
                     fields=tuple(  # type: ignore[misc]
-                        (f_name, get_data_type_from_json(f_dtype, zarr_format=zarr_format))
+                        (f_name, get_data_type_from_json_v2(f_dtype, object_codec_id=None))  # type: ignore[has-type]
                         for f_name, f_dtype in data
                     )
                 )
-            elif zarr_format == 3:
-                if isinstance(data, dict) and "configuration" in data:
-                    config = data["configuration"]
-                    if isinstance(config, dict) and "fields" in config:
-                        meta_fields = config["fields"]
-                        fields = tuple(
-                            (f_name, get_data_type_from_json(f_dtype, zarr_format=zarr_format))
-                            for f_name, f_dtype in meta_fields
-                        )
-                        return cls(fields=fields)
-                    else:
-                        raise TypeError(
-                            f"Invalid type: {data}. Expected a dictionary."
-                        )  # pragma: no cover
-                else:
-                    raise TypeError(
-                        f"Invalid type: {data}. Expected a dictionary."
-                    )  # pragma: no cover
+            else:
+                raise DataTypeValidationError(f"Invalid JSON representation of data type {cls}.")
+        elif zarr_format == 3:
+            if cls.check_json_v3(data):  # type: ignore[arg-type]
+                config = data["configuration"]
+                meta_fields = config["fields"]
+                fields = tuple(
+                    (f_name, get_data_type_from_json_v3(f_dtype)) for f_name, f_dtype in meta_fields
+                )
+            else:
+                raise DataTypeValidationError(f"Invalid JSON representation of data type {cls}.")
+        else:
             raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
-        raise DataTypeValidationError(f"Invalid JSON representation of data type {cls}.")
 
-    def to_dtype(self) -> np.dtypes.VoidDType[int]:
+        return cls(fields=fields)
+
+    def to_native_dtype(self) -> np.dtypes.VoidDType[int]:
         return cast(
             "np.dtypes.VoidDType[int]",
-            np.dtype([(key, dtype.to_dtype()) for (key, dtype) in self.fields]),
+            np.dtype([(key, dtype.to_native_dtype()) for (key, dtype) in self.fields]),
         )
 
-    def to_json_value(self, data: object, *, zarr_format: ZarrFormat) -> str:
-        return bytes_to_json(self.cast_value(data).tobytes(), zarr_format)
+    def to_json_scalar(self, data: object, *, zarr_format: ZarrFormat) -> str:
+        return bytes_to_json(self.cast_scalar(data).tobytes(), zarr_format)
 
-    def check_value(self, data: object) -> bool:
+    def check_scalar(self, data: object) -> bool:
         # TODO: implement something here!
         return True
 
-    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.void:
+    def from_json_scalar(self, data: JSON, *, zarr_format: ZarrFormat) -> np.void:
         if check_json_str(data):
             as_bytes = bytes_from_json(data, zarr_format=zarr_format)
-            dtype = self.to_dtype()
+            dtype = self.to_native_dtype()
             return cast("np.void", np.array([as_bytes]).view(dtype)[0])
         raise TypeError(f"Invalid type: {data}. Expected a string.")  # pragma: no cover
 
     @property
     def item_size(self) -> int:
         # Lets have numpy do the arithmetic here
-        return self.to_dtype().itemsize
+        return self.to_native_dtype().itemsize
diff --git a/src/zarr/core/dtype/npy/string.py b/src/zarr/core/dtype/npy/string.py
index b5b86ca387..2299b7aab1 100644
--- a/src/zarr/core/dtype/npy/string.py
+++ b/src/zarr/core/dtype/npy/string.py
@@ -3,18 +3,19 @@
 import base64
 import re
 from dataclasses import dataclass
-from typing import TYPE_CHECKING, ClassVar, Self, TypeGuard, cast
+from typing import TYPE_CHECKING, ClassVar, Literal, Self, TypedDict, TypeGuard, cast, overload
 
 import numpy as np
 
-from zarr.core.dtype.common import HasEndianness, HasItemSize, HasLength
+from zarr.core.common import NamedConfig
+from zarr.core.dtype.common import HasEndianness, HasItemSize, HasLength, HasObjectCodec
 from zarr.core.dtype.npy.common import (
     EndiannessNumpy,
     check_json_str,
     endianness_from_numpy_str,
     endianness_to_numpy_str,
 )
-from zarr.core.dtype.wrapper import ZDType
+from zarr.core.dtype.wrapper import DTypeJSON_V2, DTypeJSON_V3, ZDType
 
 if TYPE_CHECKING:
     from zarr.core.common import JSON, ZarrFormat
@@ -23,39 +24,53 @@
 _NUMPY_SUPPORTS_VLEN_STRING = hasattr(np.dtypes, "StringDType")
 
 
+class LengthBytesConfig(TypedDict):
+    length_bytes: int
+
+
+# TDO: Fix this terrible name
+FixedLengthASCIIJSONV3 = NamedConfig[Literal["fixed_length_ascii"], LengthBytesConfig]
+
+
 @dataclass(frozen=True, kw_only=True)
 class FixedLengthASCII(ZDType[np.dtypes.BytesDType[int], np.bytes_], HasLength, HasItemSize):
     dtype_cls = np.dtypes.BytesDType
-    _zarr_v3_name = "numpy.fixed_length_ascii"
+    _zarr_v3_name: ClassVar[Literal["fixed_length_ascii"]] = "fixed_length_ascii"
 
     @classmethod
-    def _from_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
+    def _from_native_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
         return cls(length=dtype.itemsize)
 
-    def to_dtype(self) -> np.dtypes.BytesDType[int]:
+    def to_native_dtype(self) -> np.dtypes.BytesDType[int]:
         return self.dtype_cls(self.length)
 
     @classmethod
-    def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[JSON]:
+    def check_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> TypeGuard[str]:
         """
         Check that the input is a valid JSON representation of a numpy S dtype.
         """
-        if zarr_format == 2:
-            # match |S1, |S2, etc
-            return isinstance(data, str) and re.match(r"^\|S\d+$", data) is not None
-        elif zarr_format == 3:
-            return (
-                isinstance(data, dict)
-                and set(data.keys()) == {"name", "configuration"}
-                and data["name"] == cls._zarr_v3_name
-                and isinstance(data["configuration"], dict)
-                and "length_bytes" in data["configuration"]
-            )
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+        # match |S1, |S2, etc
+        return isinstance(data, str) and re.match(r"^\|S\d+$", data) is not None
+
+    @classmethod
+    def check_json_v3(cls, data: JSON) -> TypeGuard[FixedLengthASCIIJSONV3]:
+        return (
+            isinstance(data, dict)
+            and set(data.keys()) == {"name", "configuration"}
+            and data["name"] == cls._zarr_v3_name
+            and isinstance(data["configuration"], dict)
+            and "length_bytes" in data["configuration"]
+        )
+
+    @overload
+    def to_json(self, zarr_format: Literal[2]) -> str: ...
+
+    @overload
+    def to_json(self, zarr_format: Literal[3]) -> FixedLengthASCIIJSONV3: ...
 
-    def to_json(self, zarr_format: ZarrFormat) -> JSON:
+    def to_json(self, zarr_format: ZarrFormat) -> str | FixedLengthASCIIJSONV3:
         if zarr_format == 2:
-            return self.to_dtype().str
+            return self.to_native_dtype().str
         elif zarr_format == 3:
             return {
                 "name": self._zarr_v3_name,
@@ -64,29 +79,31 @@ def to_json(self, zarr_format: ZarrFormat) -> JSON:
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     @classmethod
-    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
+    def _from_json_unchecked(
+        cls, data: DTypeJSON_V2 | DTypeJSON_V3, *, zarr_format: ZarrFormat
+    ) -> Self:
         if zarr_format == 2:
-            return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
+            return cls.from_native_dtype(np.dtype(data))  # type: ignore[arg-type]
         elif zarr_format == 3:
-            return cls(length=data["configuration"]["length_bytes"])  # type: ignore[arg-type, index, call-overload]
+            return cls(length=data["configuration"]["length_bytes"])  # type: ignore[index, call-overload]
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
-    def default_value(self) -> np.bytes_:
+    def default_scalar(self) -> np.bytes_:
         return np.bytes_(b"")
 
-    def to_json_value(self, data: object, *, zarr_format: ZarrFormat) -> str:
+    def to_json_scalar(self, data: object, *, zarr_format: ZarrFormat) -> str:
         return base64.standard_b64encode(data).decode("ascii")  # type: ignore[arg-type]
 
-    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.bytes_:
+    def from_json_scalar(self, data: JSON, *, zarr_format: ZarrFormat) -> np.bytes_:
         if check_json_str(data):
-            return self.to_dtype().type(base64.standard_b64decode(data.encode("ascii")))
+            return self.to_native_dtype().type(base64.standard_b64decode(data.encode("ascii")))
         raise TypeError(f"Invalid type: {data}. Expected a string.")  # pragma: no cover
 
-    def check_value(self, data: object) -> bool:
+    def check_scalar(self, data: object) -> bool:
         # this is generous for backwards compatibility
         return isinstance(data, np.bytes_ | str | bytes | int)
 
-    def _cast_value_unsafe(self, data: object) -> np.bytes_:
+    def _cast_scalar_unchecked(self, data: object) -> np.bytes_:
         # We explicitly truncate the result because of the following numpy behavior:
         # >>> x = np.dtype('S3').type('hello world')
         # >>> x
@@ -94,56 +111,68 @@ def _cast_value_unsafe(self, data: object) -> np.bytes_:
         # >>> x.dtype
         # dtype('S11')
 
-        return self.to_dtype().type(data[: self.length])  # type: ignore[index]
+        if isinstance(data, int):
+            return self.to_native_dtype().type(str(data)[: self.length])
+        else:
+            return self.to_native_dtype().type(data[: self.length])  # type: ignore[index]
 
     @property
     def item_size(self) -> int:
         return self.length
 
 
+# TODO: Fix this terrible name
+FixedLengthUTF32JSONV3 = NamedConfig[Literal["fixed_length_utf32"], LengthBytesConfig]
+
+
 @dataclass(frozen=True, kw_only=True)
 class FixedLengthUTF32(
     ZDType[np.dtypes.StrDType[int], np.str_], HasEndianness, HasLength, HasItemSize
 ):
     dtype_cls = np.dtypes.StrDType
-    _zarr_v3_name = "numpy.fixed_length_utf32"
+    _zarr_v3_name: ClassVar[Literal["fixed_length_utf32"]] = "fixed_length_utf32"
     code_point_bytes: ClassVar[int] = 4  # utf32 is 4 bytes per code point
 
     @classmethod
-    def _from_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
+    def _from_native_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
         byte_order = cast("EndiannessNumpy", dtype.byteorder)
         return cls(
             length=dtype.itemsize // (cls.code_point_bytes),
             endianness=endianness_from_numpy_str(byte_order),
         )
 
-    def to_dtype(self) -> np.dtypes.StrDType[int]:
+    def to_native_dtype(self) -> np.dtypes.StrDType[int]:
         byte_order = endianness_to_numpy_str(self.endianness)
         return self.dtype_cls(self.length).newbyteorder(byte_order)
 
     @classmethod
-    def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[JSON]:
+    def check_json_v2(cls, data: JSON, object_codec_id: str | None = None) -> TypeGuard[str]:
         """
         Check that the input is a valid JSON representation of a numpy S dtype.
         """
-        if zarr_format == 2:
-            # match >U1, <U2, etc
-            return isinstance(data, str) and re.match(r"^[><]U\d+$", data) is not None
-        elif zarr_format == 3:
-            return (
-                isinstance(data, dict)
-                and set(data.keys()) == {"name", "configuration"}
-                and data["name"] == cls._zarr_v3_name
-                and "configuration" in data
-                and isinstance(data["configuration"], dict)
-                and set(data["configuration"].keys()) == {"length_bytes"}
-                and isinstance(data["configuration"]["length_bytes"], int)
-            )
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+        return isinstance(data, str) and re.match(r"^[><]U\d+$", data) is not None
+
+    @classmethod
+    def check_json_v3(cls, data: JSON) -> TypeGuard[FixedLengthUTF32JSONV3]:
+        return (
+            isinstance(data, dict)
+            and set(data.keys()) == {"name", "configuration"}
+            and data["name"] == cls._zarr_v3_name
+            and "configuration" in data
+            and isinstance(data["configuration"], dict)
+            and set(data["configuration"].keys()) == {"length_bytes"}
+            and isinstance(data["configuration"]["length_bytes"], int)
+        )
+
+    @overload
+    def to_json(self, zarr_format: Literal[2]) -> str: ...
 
-    def to_json(self, zarr_format: ZarrFormat) -> JSON:
+    @overload
+    def to_json(self, zarr_format: Literal[3]) -> FixedLengthUTF32JSONV3: ...
+
+    def to_json(self, zarr_format: ZarrFormat) -> str | FixedLengthUTF32JSONV3:
         if zarr_format == 2:
-            return self.to_dtype().str
+            return self.to_native_dtype().str
         elif zarr_format == 3:
             return {
                 "name": self._zarr_v3_name,
@@ -152,29 +181,31 @@ def to_json(self, zarr_format: ZarrFormat) -> JSON:
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     @classmethod
-    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
+    def _from_json_unchecked(
+        cls, data: DTypeJSON_V2 | DTypeJSON_V3, *, zarr_format: ZarrFormat
+    ) -> Self:
         if zarr_format == 2:
-            return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
+            return cls.from_native_dtype(np.dtype(data))  # type: ignore[arg-type]
         elif zarr_format == 3:
-            return cls(length=data["configuration"]["length_bytes"] // cls.code_point_bytes)  # type: ignore[arg-type, index, call-overload, operator]
+            return cls(length=data["configuration"]["length_bytes"] // cls.code_point_bytes)  # type: ignore[index, call-overload]
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
-    def default_value(self) -> np.str_:
+    def default_scalar(self) -> np.str_:
         return np.str_("")
 
-    def to_json_value(self, data: object, *, zarr_format: ZarrFormat) -> str:
+    def to_json_scalar(self, data: object, *, zarr_format: ZarrFormat) -> str:
         return str(data)
 
-    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.str_:
+    def from_json_scalar(self, data: JSON, *, zarr_format: ZarrFormat) -> np.str_:
         if check_json_str(data):
-            return self.to_dtype().type(data)
+            return self.to_native_dtype().type(data)
         raise TypeError(f"Invalid type: {data}. Expected a string.")  # pragma: no cover
 
-    def check_value(self, data: object) -> bool:
+    def check_scalar(self, data: object) -> bool:
         # this is generous for backwards compatibility
         return isinstance(data, str | np.str_ | bytes | int)
 
-    def _cast_value_unsafe(self, data: object) -> np.str_:
+    def _cast_scalar_unchecked(self, data: object) -> np.str_:
         # We explicitly truncate the result because of the following numpy behavior:
         # >>> x = np.dtype('U3').type('hello world')
         # >>> x
@@ -182,7 +213,10 @@ def _cast_value_unsafe(self, data: object) -> np.str_:
         # >>> x.dtype
         # dtype('U11')
 
-        return self.to_dtype().type(data[: self.length])  # type: ignore[index]
+        if isinstance(data, int):
+            return self.to_native_dtype().type(str(data)[: self.length])
+        else:
+            return self.to_native_dtype().type(data[: self.length])  # type: ignore[index]
 
     @property
     def item_size(self) -> int:
@@ -192,32 +226,38 @@ def item_size(self) -> int:
 if _NUMPY_SUPPORTS_VLEN_STRING:
 
     @dataclass(frozen=True, kw_only=True)
-    class VariableLengthString(ZDType[np.dtypes.StringDType, str]):  # type: ignore[type-var]
+    class VariableLengthString(ZDType[np.dtypes.StringDType, str], HasObjectCodec):  # type: ignore[type-var]
         dtype_cls = np.dtypes.StringDType
-        _zarr_v3_name = "numpy.variable_length_utf8"
+        _zarr_v3_name: ClassVar[Literal["variable_length_utf8"]] = "variable_length_utf8"
+        object_codec_id = "vlen-utf8"
 
         @classmethod
-        def _from_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
+        def _from_native_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
             return cls()
 
-        def to_dtype(self) -> np.dtypes.StringDType:
+        def to_native_dtype(self) -> np.dtypes.StringDType:
             return self.dtype_cls()
 
         @classmethod
-        def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[JSON]:
+        def check_json_v2(
+            cls, data: JSON, *, object_codec_id: str | None = None
+        ) -> TypeGuard[Literal["|O"]]:
             """
-            Check that the input is a valid JSON representation of a numpy string dtype.
+            Check that the input is a valid JSON representation of a numpy O dtype, and that the
+            object codec id is appropriate for variable-length UTF-8 strings.
             """
-            if zarr_format == 2:
-                # TODO: take the entire metadata document in here, and
-                # check the compressors / filters for vlen-utf8
-                # Note that we are checking for the object dtype name.
-                return data == "|O"
-            elif zarr_format == 3:
-                return data == cls._zarr_v3_name
-            raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+            return data == "|O" and object_codec_id == cls.object_codec_id
+
+        @classmethod
+        def check_json_v3(cls, data: JSON) -> TypeGuard[Literal["variable_length_utf8"]]:
+            return data == cls._zarr_v3_name
 
-        def to_json(self, zarr_format: ZarrFormat) -> JSON:
+        @overload
+        def to_json(self, zarr_format: Literal[2]) -> Literal["|O"]: ...
+        @overload
+        def to_json(self, zarr_format: Literal[3]) -> Literal["variable_length_utf8"]: ...
+
+        def to_json(self, zarr_format: ZarrFormat) -> Literal["|O", "variable_length_utf8"]:
             if zarr_format == 2:
                 # Note: unlike many other numpy data types, we don't serialize the .str attribute
                 # of the data type to JSON. This is because Zarr was using `|O` for strings before the
@@ -229,71 +269,83 @@ def to_json(self, zarr_format: ZarrFormat) -> JSON:
             raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
         @classmethod
-        def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
+        def _from_json_unchecked(
+            cls, data: DTypeJSON_V2 | DTypeJSON_V3, *, zarr_format: ZarrFormat
+        ) -> Self:
             return cls()
 
-        def default_value(self) -> str:
+        def default_scalar(self) -> str:
             return ""
 
-        def to_json_value(self, data: object, *, zarr_format: ZarrFormat) -> str:
+        def to_json_scalar(self, data: object, *, zarr_format: ZarrFormat) -> str:
             return str(data)
 
-        def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> str:
+        def from_json_scalar(self, data: JSON, *, zarr_format: ZarrFormat) -> str:
             if not check_json_str(data):
                 raise TypeError(f"Invalid type: {data}. Expected a string.")
             return data
 
-        def check_value(self, data: object) -> bool:
+        def check_scalar(self, data: object) -> bool:
             return isinstance(data, str)
 
-        def _cast_value_unsafe(self, data: object) -> str:
+        def _cast_scalar_unchecked(self, data: object) -> str:
             return str(data)
 
 else:
     # Numpy pre-2 does not have a variable length string dtype, so we use the Object dtype instead.
     @dataclass(frozen=True, kw_only=True)
-    class VariableLengthString(ZDType[np.dtypes.ObjectDType, str]):  # type: ignore[no-redef]
+    class VariableLengthString(ZDType[np.dtypes.ObjectDType, str], HasObjectCodec):  # type: ignore[no-redef]
         dtype_cls = np.dtypes.ObjectDType
-        _zarr_v3_name = "numpy.variable_length_utf8"
+        _zarr_v3_name: ClassVar[Literal["variable_length_utf8"]] = "variable_length_utf8"
+        object_codec_id = "vlen-utf8"
 
         @classmethod
-        def _from_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
+        def _from_native_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
             return cls()
 
-        def to_dtype(self) -> np.dtypes.ObjectDType:
+        def to_native_dtype(self) -> np.dtypes.ObjectDType:
             return self.dtype_cls()
 
         @classmethod
-        def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[JSON]:
+        def check_json_v2(
+            cls, data: JSON, *, object_codec_id: str | None = None
+        ) -> TypeGuard[Literal["|O"]]:
             """
-            Check that the input is a valid JSON representation of a numpy O dtype.
+            Check that the input is a valid JSON representation of a numpy O dtype, and that the
+            object codec id is appropriate for variable-length UTF-8 strings.
             """
-            if zarr_format == 2:
-                # TODO: take the entire metadata document in here, and
-                # check the compressors / filters for vlen-utf8
-                return data == "|O"
-            elif zarr_format == 3:
-                return data == cls._zarr_v3_name
-            raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+            return data == "|O" and object_codec_id == cls.object_codec_id
 
-        def to_json(self, zarr_format: ZarrFormat) -> JSON:
+        @classmethod
+        def check_json_v3(cls, data: JSON) -> TypeGuard[Literal["variable_length_utf8"]]:
+            return data == cls._zarr_v3_name
+
+        @overload
+        def to_json(self, zarr_format: Literal[2]) -> Literal["|O"]: ...
+
+        @overload
+        def to_json(self, zarr_format: Literal[3]) -> Literal["variable_length_utf8"]: ...
+
+        def to_json(self, zarr_format: ZarrFormat) -> Literal["|O", "variable_length_utf8"]:
             if zarr_format == 2:
-                return self.to_dtype().str
+                return "|O"
             elif zarr_format == 3:
                 return self._zarr_v3_name
             raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
         @classmethod
-        def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
+        def _from_json_unchecked(
+            cls, data: DTypeJSON_V2 | DTypeJSON_V3, *, zarr_format: ZarrFormat
+        ) -> Self:
             return cls()
 
-        def default_value(self) -> str:
+        def default_scalar(self) -> str:
             return ""
 
-        def to_json_value(self, data: object, *, zarr_format: ZarrFormat) -> str:
+        def to_json_scalar(self, data: object, *, zarr_format: ZarrFormat) -> str:
             return data  # type: ignore[return-value]
 
-        def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> str:
+        def from_json_scalar(self, data: JSON, *, zarr_format: ZarrFormat) -> str:
             """
             Strings pass through
             """
@@ -301,8 +353,8 @@ def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> str:
                 raise TypeError(f"Invalid type: {data}. Expected a string.")
             return data
 
-        def check_value(self, data: object) -> bool:
+        def check_scalar(self, data: object) -> bool:
             return isinstance(data, str)
 
-        def _cast_value_unsafe(self, data: object) -> str:
+        def _cast_scalar_unchecked(self, data: object) -> str:
             return str(data)
diff --git a/src/zarr/core/dtype/npy/time.py b/src/zarr/core/dtype/npy/time.py
index 1c0e0d715c..4c5ce45442 100644
--- a/src/zarr/core/dtype/npy/time.py
+++ b/src/zarr/core/dtype/npy/time.py
@@ -1,11 +1,9 @@
 from __future__ import annotations
 
-from collections.abc import Mapping
 from dataclasses import dataclass
 from typing import (
     TYPE_CHECKING,
     ClassVar,
-    Generic,
     Literal,
     Self,
     TypedDict,
@@ -13,10 +11,12 @@
     TypeVar,
     cast,
     get_args,
+    overload,
 )
 
 import numpy as np
 
+from zarr.core.common import NamedConfig
 from zarr.core.dtype.common import HasEndianness, HasItemSize
 from zarr.core.dtype.npy.common import (
     DateTimeUnit,
@@ -25,7 +25,7 @@
     endianness_from_numpy_str,
     endianness_to_numpy_str,
 )
-from zarr.core.dtype.wrapper import TBaseDType, ZDType
+from zarr.core.dtype.wrapper import DTypeJSON_V2, DTypeJSON_V3, TBaseDType, ZDType
 
 if TYPE_CHECKING:
     from zarr.core.common import JSON, ZarrFormat
@@ -79,23 +79,14 @@ def datetimelike_to_int(data: np.datetime64 | np.timedelta64) -> int:
 )
 _BaseTimeScalar = TypeVar("_BaseTimeScalar", bound=np.timedelta64 | np.datetime64)
 
-TName = TypeVar("TName", bound=str)
-TConfig = TypeVar("TConfig", bound=Mapping[str, object])
-
-
-class NamedConfig(TypedDict, Generic[TName, TConfig]):
-    name: TName
-    configuration: TConfig
-
 
 class TimeConfig(TypedDict):
     unit: DateTimeUnit
     interval: int
 
 
-# aspirational
-DateTime64MetaParams = NamedConfig[Literal["numpy.datetime64"], TimeConfig]
-TimeDelta64MetaParams = NamedConfig[Literal["numpy.timedelta64"], TimeConfig]
+DateTime64JSONV3 = NamedConfig[Literal["numpy.datetime64"], TimeConfig]
+TimeDelta64JSONV3 = NamedConfig[Literal["numpy.timedelta64"], TimeConfig]
 
 
 @dataclass(frozen=True, kw_only=True, slots=True)
@@ -117,7 +108,7 @@ def __post_init__(self) -> None:
             raise ValueError(f"unit must be one of {get_args(DateTimeUnit)}, got {self.unit!r}.")
 
     @classmethod
-    def _from_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
+    def _from_native_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
         unit, scale_factor = np.datetime_data(dtype.name)
         unit = cast("DateTimeUnit", unit)
         byteorder = cast("EndiannessNumpy", dtype.byteorder)
@@ -125,7 +116,7 @@ def _from_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
             unit=unit, scale_factor=scale_factor, endianness=endianness_from_numpy_str(byteorder)
         )
 
-    def to_dtype(self) -> _BaseTimeDType_co:
+    def to_native_dtype(self) -> _BaseTimeDType_co:
         # Numpy does not allow creating datetime64 or timedelta64 via
         # np.dtypes.{dtype_name}()
         # so we use np.dtype with a formatted string.
@@ -133,32 +124,42 @@ def to_dtype(self) -> _BaseTimeDType_co:
         return np.dtype(dtype_string).newbyteorder(endianness_to_numpy_str(self.endianness))  # type: ignore[return-value]
 
     @classmethod
-    def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self:
+    def _from_json_unchecked(
+        cls, data: DTypeJSON_V2 | DTypeJSON_V3, *, zarr_format: ZarrFormat
+    ) -> Self:
         if zarr_format == 2:
-            return cls.from_dtype(np.dtype(data))  # type: ignore[arg-type]
+            return cls.from_native_dtype(np.dtype(data))  # type: ignore[arg-type]
         elif zarr_format == 3:
             unit = data["configuration"]["unit"]  # type: ignore[index, call-overload]
             scale_factor = data["configuration"]["scale_factor"]  # type: ignore[index, call-overload]
-            return cls(unit=unit, scale_factor=scale_factor)  # type: ignore[arg-type]
+            return cls(unit=unit, scale_factor=scale_factor)
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
-    def to_json(self, zarr_format: ZarrFormat) -> JSON:
+    @overload
+    def to_json(self, zarr_format: Literal[2]) -> str: ...
+    @overload
+    def to_json(self, zarr_format: Literal[3]) -> DateTime64JSONV3 | TimeDelta64JSONV3: ...
+
+    def to_json(self, zarr_format: ZarrFormat) -> str | DateTime64JSONV3 | TimeDelta64JSONV3:
         if zarr_format == 2:
-            return cast("str", self.to_dtype().str)
+            return cast("str", self.to_native_dtype().str)
         elif zarr_format == 3:
-            return {
-                "name": self._zarr_v3_name,
-                "configuration": {"unit": self.unit, "scale_factor": self.scale_factor},
-            }
+            return cast(
+                "DateTime64JSONV3 | TimeDelta64JSONV3",
+                {
+                    "name": self._zarr_v3_name,
+                    "configuration": {"unit": self.unit, "scale_factor": self.scale_factor},
+                },
+            )
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
-    def to_json_value(self, data: object, *, zarr_format: ZarrFormat) -> int:
+    def to_json_scalar(self, data: object, *, zarr_format: ZarrFormat) -> int:
         return datetimelike_to_int(data)  # type: ignore[arg-type]
 
-    def check_value(self, data: object) -> bool:
+    def check_scalar(self, data: object) -> bool:
         # TODO: decide which values we should accept for datetimes.
         try:
-            np.array([data], dtype=self.to_dtype())
+            np.array([data], dtype=self.to_native_dtype())
             return True  # noqa: TRY300
         except ValueError:
             return False
@@ -178,91 +179,90 @@ class TimeDelta64(TimeDTypeBase[np.dtypes.TimeDelta64DType, np.timedelta64], Has
     """
 
     dtype_cls = np.dtypes.TimeDelta64DType  # type: ignore[assignment]
-    _zarr_v3_name = "numpy.timedelta64"
+    _zarr_v3_name: ClassVar[Literal["numpy.timedelta64"]] = "numpy.timedelta64"
     _zarr_v2_names = (">m8", "<m8")
     _numpy_name = "timedelta64"
     scale_factor: int = 1
     unit: DateTimeUnit = "generic"
 
-    def default_value(self) -> np.timedelta64:
+    def default_scalar(self) -> np.timedelta64:
         return np.timedelta64("NaT")
 
-    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.timedelta64:
+    def from_json_scalar(self, data: JSON, *, zarr_format: ZarrFormat) -> np.timedelta64:
         if check_json_int(data) or data == "NaT":
-            return self.to_dtype().type(data, f"{self.scale_factor}{self.unit}")  # type: ignore[arg-type]
+            return self.to_native_dtype().type(data, f"{self.scale_factor}{self.unit}")  # type: ignore[arg-type]
         raise TypeError(f"Invalid type: {data}. Expected an integer.")  # pragma: no cover
 
-    def _cast_value_unsafe(self, data: object) -> np.timedelta64:
-        return self.to_dtype().type(data)  # type: ignore[arg-type]
+    def _cast_scalar_unchecked(self, data: object) -> np.timedelta64:
+        return self.to_native_dtype().type(data)  # type: ignore[arg-type]
 
     @classmethod
-    def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[JSON]:
-        if zarr_format == 2:
-            # match <m[ns], >m[M], etc
-            # consider making this a standalone function
-            if not isinstance(data, str):
-                return False
-            if not data.startswith(cls._zarr_v2_names):
-                return False
-            if len(data) == 3:
-                # no unit, and
-                # we already checked that this string is either <m8 or >m8
-                return True
-            else:
-                return data[4:-1].endswith(get_args(DateTimeUnit)) and data[-1] == "]"
-        elif zarr_format == 3:
-            return (
-                isinstance(data, dict)
-                and set(data.keys()) == {"name", "configuration"}
-                and data["name"] == cls._zarr_v3_name
-                and set(data.keys()) == {"name", "configuration"}
-                and isinstance(data["configuration"], dict)
-                and set(data["configuration"].keys()) == {"unit", "scale_factor"}
-            )
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+    def check_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> TypeGuard[str]:
+        # match <m[ns], >m[M], etc
+        # consider making this a standalone function
+        if not isinstance(data, str):
+            return False
+        if not data.startswith(cls._zarr_v2_names):
+            return False
+        if len(data) == 3:
+            # no unit, and
+            # we already checked that this string is either <m8 or >m8
+            return True
+        else:
+            return data[4:-1].endswith(get_args(DateTimeUnit)) and data[-1] == "]"
+
+    @classmethod
+    def check_json_v3(cls, data: JSON) -> TypeGuard[DateTime64JSONV3]:
+        return (
+            isinstance(data, dict)
+            and set(data.keys()) == {"name", "configuration"}
+            and data["name"] == cls._zarr_v3_name
+            and isinstance(data["configuration"], dict)
+            and set(data["configuration"].keys()) == {"unit", "scale_factor"}
+        )
 
 
 @dataclass(frozen=True, kw_only=True, slots=True)
 class DateTime64(TimeDTypeBase[np.dtypes.DateTime64DType, np.datetime64], HasEndianness):
     dtype_cls = np.dtypes.DateTime64DType  # type: ignore[assignment]
-    _zarr_v3_name = "numpy.datetime64"
+    _zarr_v3_name: ClassVar[Literal["numpy.datetime64"]] = "numpy.datetime64"
     _zarr_v2_names = (">M8", "<M8")
     _numpy_name = "datetime64"
     unit: DateTimeUnit = "generic"
     scale_factor: int = 1
 
-    def default_value(self) -> np.datetime64:
+    def default_scalar(self) -> np.datetime64:
         return np.datetime64("NaT")
 
-    def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.datetime64:
+    def from_json_scalar(self, data: JSON, *, zarr_format: ZarrFormat) -> np.datetime64:
         if check_json_int(data) or data == "NaT":
-            return self.to_dtype().type(data, f"{self.scale_factor}{self.unit}")  # type: ignore[arg-type]
+            return self.to_native_dtype().type(data, f"{self.scale_factor}{self.unit}")  # type: ignore[arg-type]
         raise TypeError(f"Invalid type: {data}. Expected an integer.")  # pragma: no cover
 
-    def _cast_value_unsafe(self, data: object) -> np.datetime64:
-        return self.to_dtype().type(data, f"{self.scale_factor}{self.unit}")  # type: ignore[no-any-return, call-overload]
+    def _cast_scalar_unchecked(self, data: object) -> np.datetime64:
+        return self.to_native_dtype().type(data, f"{self.scale_factor}{self.unit}")  # type: ignore[no-any-return, call-overload]
 
     @classmethod
-    def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[JSON]:
-        if zarr_format == 2:
-            # match <M[ns], >M[M], etc
-            # consider making this a standalone function
-            if not isinstance(data, str):
-                return False
-            if not data.startswith(cls._zarr_v2_names):
-                return False
-            if len(data) == 3:
-                # no unit, and
-                # we already checked that this string is either <M8 or >M8
-                return True
-            else:
-                return data[4:-1].endswith(get_args(DateTimeUnit)) and data[-1] == "]"
-        elif zarr_format == 3:
-            return (
-                isinstance(data, dict)
-                and set(data.keys()) == {"name", "configuration"}
-                and data["name"] == cls._zarr_v3_name
-                and set(data["configuration"].keys()) == {"unit", "scale_factor"}
-                and data["configuration"]["unit"] in get_args(DateTimeUnit)
-            )
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+    def check_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> TypeGuard[str]:
+        # match <M[ns], >M[M], etc
+        # consider making this a standalone function
+        if not isinstance(data, str):
+            return False
+        if not data.startswith(cls._zarr_v2_names):
+            return False
+        if len(data) == 3:
+            # no unit, and
+            # we already checked that this string is either <M8 or >M8
+            return True
+        else:
+            return data[4:-1].endswith(get_args(DateTimeUnit)) and data[-1] == "]"
+
+    @classmethod
+    def check_json_v3(cls, data: JSON) -> TypeGuard[DateTime64JSONV3]:
+        return (
+            isinstance(data, dict)
+            and set(data.keys()) == {"name", "configuration"}
+            and data["name"] == cls._zarr_v3_name
+            and isinstance(data["configuration"], dict)
+            and set(data["configuration"].keys()) == {"unit", "scale_factor"}
+        )
diff --git a/src/zarr/core/dtype/registry.py b/src/zarr/core/dtype/registry.py
index 047f908ac6..0423f69dbe 100644
--- a/src/zarr/core/dtype/registry.py
+++ b/src/zarr/core/dtype/registry.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import contextlib
 from dataclasses import dataclass, field
 from typing import TYPE_CHECKING, Self
 
@@ -10,7 +11,7 @@
 if TYPE_CHECKING:
     from importlib.metadata import EntryPoint
 
-    from zarr.core.common import JSON, ZarrFormat
+    from zarr.core.common import JSON
     from zarr.core.dtype.wrapper import TBaseDType, TBaseScalar, ZDType
 
 
@@ -22,11 +23,12 @@ class DataTypeRegistry:
     contents: dict[str, type[ZDType[TBaseDType, TBaseScalar]]] = field(
         default_factory=dict, init=False
     )
+
     lazy_load_list: list[EntryPoint] = field(default_factory=list, init=False)
 
     def lazy_load(self) -> None:
         for e in self.lazy_load_list:
-            self.register(e.name, e.load())
+            self.register(e.load()._zarr_v3_name, e.load())
 
         self.lazy_load_list.clear()
 
@@ -35,14 +37,20 @@ def register(self: Self, key: str, cls: type[ZDType[TBaseDType, TBaseScalar]]) -
         if key not in self.contents or self.contents[key] != cls:
             self.contents[key] = cls
 
+    def unregister(self, key: str) -> None:
+        """Unregister a data type by its key."""
+        if key in self.contents:
+            del self.contents[key]
+        else:
+            raise KeyError(f"Data type '{key}' not found in registry.")
+
     def get(self, key: str) -> type[ZDType[TBaseDType, TBaseScalar]]:
         return self.contents[key]
 
     def match_dtype(self, dtype: TBaseDType) -> ZDType[TBaseDType, TBaseScalar]:
-        self.lazy_load()
         if dtype == np.dtype("O"):
             msg = (
-                "Data type resolution failed. "
+                f"Zarr data type resolution from {dtype} failed. "
                 'Attempted to resolve a zarr data type from a numpy "Object" data type, which is '
                 'ambiguous, as multiple zarr data types can be represented by the numpy "Object" '
                 "data type. "
@@ -51,18 +59,41 @@ def match_dtype(self, dtype: TBaseDType) -> ZDType[TBaseDType, TBaseScalar]:
                 "data type, see xxxxxxxxxxx"
             )
             raise ValueError(msg)
+        matched: list[ZDType[TBaseDType, TBaseScalar]] = []
+        for val in self.contents.values():
+            with contextlib.suppress(DataTypeValidationError):
+                matched.append(val.from_native_dtype(dtype))
+        if len(matched) == 1:
+            return matched[0]
+        elif len(matched) > 1:
+            msg = (
+                f"Zarr data type resolution from {dtype} failed. "
+                f"Multiple data type wrappers found that match dtype '{dtype}': {matched}. "
+                "You should unregister one of these data types, or avoid Zarr data type inference "
+                "entirely by providing a specific Zarr data type when creating your array."
+                "For more information, see xxxxxxxxxxxxxxxxxx"
+            )
+            raise ValueError(msg)
+        raise ValueError(f"No data type wrapper found that matches dtype '{dtype}'")
+
+    def match_json_v2(
+        self, data: JSON, *, object_codec_id: str | None = None
+    ) -> ZDType[TBaseDType, TBaseScalar]:
+        # The dtype field in zarr v2 JSON metadata is not unique across different distinct data types.
+        # Specifically, multiple distinct data types all use the "|O" data type representation.
+        # These must be disambiguated by the presence of an "object codec", which is a codec
+        # like variable-length utf8 encoding for strings.
         for val in self.contents.values():
             try:
-                return val.from_dtype(dtype)
+                return val.from_json_v2(data, object_codec_id=object_codec_id)
             except DataTypeValidationError:
                 pass
-        raise ValueError(f"No data type wrapper found that matches dtype '{dtype}'")
+        raise ValueError(f"No data type wrapper found that matches {data}")
 
-    def match_json(self, data: JSON, zarr_format: ZarrFormat) -> ZDType[TBaseDType, TBaseScalar]:
-        self.lazy_load()
+    def match_json_v3(self, data: JSON) -> ZDType[TBaseDType, TBaseScalar]:
         for val in self.contents.values():
             try:
-                return val.from_json(data, zarr_format=zarr_format)
+                return val.from_json_v3(data)
             except DataTypeValidationError:
                 pass
         raise ValueError(f"No data type wrapper found that matches {data}")
diff --git a/src/zarr/core/dtype/wrapper.py b/src/zarr/core/dtype/wrapper.py
index bd9686afc1..c9b23707e8 100644
--- a/src/zarr/core/dtype/wrapper.py
+++ b/src/zarr/core/dtype/wrapper.py
@@ -23,8 +23,18 @@
 from __future__ import annotations
 
 from abc import ABC, abstractmethod
+from collections.abc import Mapping, Sequence
 from dataclasses import dataclass
-from typing import TYPE_CHECKING, ClassVar, Generic, Self, TypeGuard, TypeVar
+from typing import (
+    TYPE_CHECKING,
+    ClassVar,
+    Generic,
+    Literal,
+    Self,
+    TypeGuard,
+    TypeVar,
+    overload,
+)
 
 import numpy as np
 
@@ -46,6 +56,10 @@
 TScalar_co = TypeVar("TScalar_co", bound=TBaseScalar, covariant=True)
 TDType_co = TypeVar("TDType_co", bound=TBaseDType, covariant=True)
 
+# These types should include all JSON-serializable types that can be used to represent a data type.
+DTypeJSON_V2 = str | Sequence[object]
+DTypeJSON_V3 = str | Mapping[str, object]
+
 
 @dataclass(frozen=True, kw_only=True, slots=True)
 class ZDType(Generic[TDType_co, TScalar_co], ABC):
@@ -70,7 +84,7 @@ class ZDType(Generic[TDType_co, TScalar_co], ABC):
     _zarr_v3_name: ClassVar[str]
 
     @classmethod
-    def check_dtype(cls: type[Self], dtype: TBaseDType) -> TypeGuard[TDType_co]:
+    def check_native_dtype(cls: type[Self], dtype: TBaseDType) -> TypeGuard[TDType_co]:
         """
         Check that a data type matches the dtype_cls class attribute. Used as a type guard.
 
@@ -87,7 +101,7 @@ def check_dtype(cls: type[Self], dtype: TBaseDType) -> TypeGuard[TDType_co]:
         return type(dtype) is cls.dtype_cls
 
     @classmethod
-    def from_dtype(cls: type[Self], dtype: TBaseDType) -> Self:
+    def from_native_dtype(cls: type[Self], dtype: TBaseDType) -> Self:
         """
         Wrap a dtype object.
 
@@ -106,15 +120,15 @@ def from_dtype(cls: type[Self], dtype: TBaseDType) -> Self:
         TypeError
             If the dtype does not match the dtype_cls class attribute.
         """
-        if cls.check_dtype(dtype):
-            return cls._from_dtype_unsafe(dtype)
+        if cls.check_native_dtype(dtype):
+            return cls._from_native_dtype_unsafe(dtype)
         raise DataTypeValidationError(
             f"Invalid dtype: {dtype}. Expected an instance of {cls.dtype_cls}."
         )
 
     @classmethod
     @abstractmethod
-    def _from_dtype_unsafe(cls: type[Self], dtype: TBaseDType) -> Self:
+    def _from_native_dtype_unsafe(cls: type[Self], dtype: TBaseDType) -> Self:
         """
         Wrap a native dtype without checking.
 
@@ -131,7 +145,7 @@ def _from_dtype_unsafe(cls: type[Self], dtype: TBaseDType) -> Self:
         ...
 
     @abstractmethod
-    def to_dtype(self: Self) -> TDType_co:
+    def to_native_dtype(self: Self) -> TDType_co:
         """
         Return an instance of the wrapped dtype.
 
@@ -142,10 +156,10 @@ def to_dtype(self: Self) -> TDType_co:
         """
         ...
 
-    def cast_value(self, data: object) -> TScalar_co:
+    def cast_scalar(self, data: object) -> TScalar_co:
         """
-        Cast a value to the wrapped scalar type. The type is first checked for compatibility. If it's
-        incompatible with the associated scalar type, a ``TypeError`` will be raised.
+        Cast a scalar to the wrapped scalar type. The type is first checked for compatibility. If
+        it's incompatible with the associated scalar type, a ``TypeError`` will be raised.
 
         Parameters
         ----------
@@ -157,8 +171,8 @@ def cast_value(self, data: object) -> TScalar_co:
         TScalar
             The cast value.
         """
-        if self.check_value(data):
-            return self._cast_value_unsafe(data)
+        if self.check_scalar(data):
+            return self._cast_scalar_unchecked(data)
         msg = (
             f"The value {data} failed a type check. "
             f"It cannot be safely cast to a scalar compatible with {self.dtype_cls}. "
@@ -168,9 +182,9 @@ def cast_value(self, data: object) -> TScalar_co:
         raise TypeError(msg)
 
     @abstractmethod
-    def check_value(self, data: object) -> bool:
+    def check_scalar(self, data: object) -> bool:
         """
-        Check that a value is a valid value for the wrapped data type.
+        Check that a scalar is a valid value for the wrapped data type.
 
         Parameters
         ----------
@@ -185,9 +199,9 @@ def check_value(self, data: object) -> bool:
         ...
 
     @abstractmethod
-    def _cast_value_unsafe(self, data: object) -> TScalar_co:
+    def _cast_scalar_unchecked(self, data: object) -> TScalar_co:
         """
-        Cast a value to the wrapped data type. This method should not perform any input validation.
+        Cast a scalar to the wrapped data type. This method should not perform any input validation.
 
         Parameters
         ----------
@@ -202,11 +216,12 @@ def _cast_value_unsafe(self, data: object) -> TScalar_co:
         ...
 
     @abstractmethod
-    def default_value(self) -> TScalar_co:
+    def default_scalar(self) -> TScalar_co:
         """
-        Get the default value for the wrapped data type. This is a method, rather than an attribute,
+        Get the default scalar value for the wrapped data type. This is a method, rather than an attribute,
         because the default value for some data types may depend on parameters that are not known
-        until a concrete data type is wrapped.
+        until a concrete data type is wrapped. For example, data types parametrized by a length like
+        fixed-length strings or bytes will generate scalars consistent with that length.
 
         Returns
         -------
@@ -217,7 +232,35 @@ def default_value(self) -> TScalar_co:
 
     @classmethod
     @abstractmethod
-    def check_json(cls: type[Self], data: JSON, zarr_format: ZarrFormat) -> TypeGuard[JSON]:
+    def check_json_v2(
+        cls: type[Self], data: JSON, *, object_codec_id: str | None = None
+    ) -> TypeGuard[DTypeJSON_V2]:
+        """
+        Check that a JSON representation of a data type is consistent with the ZDType class.
+
+        Parameters
+        ----------
+        data : JSON
+            The JSON representation of the data type.
+
+        object_codec_id : str | None
+            The object codec ID, if applicable. Object codecs are specific numcodecs codecs that
+            zarr-python 2.x used to serialize numpy "Object" scalars. For example, a dtype field set
+            to "|O" with an object codec ID of "vlen-utf8" indicates that the data type is a
+            variable-length string.
+
+            Zarr V3 has no such logic, so this parameter is only used for Zarr V2 compatibility.
+
+        Returns
+        -------
+        Bool
+            True if the JSON representation matches, False otherwise.
+        """
+        ...
+
+    @classmethod
+    @abstractmethod
+    def check_json_v3(cls: type[Self], data: JSON) -> TypeGuard[DTypeJSON_V3]:
         """
         Check that a JSON representation of a data type matches the dtype_cls class attribute. Used
         as a type guard. This base implementation checks that the input is a dictionary,
@@ -229,9 +272,6 @@ def check_json(cls: type[Self], data: JSON, zarr_format: ZarrFormat) -> TypeGuar
         data : JSON
             The JSON representation of the data type.
 
-        zarr_format : ZarrFormat
-            The zarr format version.
-
         Returns
         -------
         Bool
@@ -239,8 +279,14 @@ def check_json(cls: type[Self], data: JSON, zarr_format: ZarrFormat) -> TypeGuar
         """
         ...
 
+    @overload
+    def to_json(self, zarr_format: Literal[2]) -> DTypeJSON_V2: ...
+
+    @overload
+    def to_json(self, zarr_format: Literal[3]) -> DTypeJSON_V3: ...
+
     @abstractmethod
-    def to_json(self, zarr_format: ZarrFormat) -> JSON:
+    def to_json(self, zarr_format: ZarrFormat) -> DTypeJSON_V2 | DTypeJSON_V3:
         """
         Convert the wrapped data type to a JSON-serializable form.
 
@@ -251,46 +297,73 @@ def to_json(self, zarr_format: ZarrFormat) -> JSON:
 
         Returns
         -------
-        JSON
+        DTypeJSON_V2 | DTypeJSON_V3
             The JSON-serializable representation of the wrapped data type
         """
         ...
 
     @classmethod
-    def from_json(cls: type[Self], data: JSON, zarr_format: ZarrFormat) -> Self:
+    def from_json_v3(cls: type[Self], data: JSON) -> Self:
         """
-        Wrap a JSON representation of a data type.
+        Wrap a Zarr V3 JSON representation of a data type.
 
         Parameters
         ----------
         data : JSON
             The JSON representation of the data type.
 
-        zarr_format : ZarrFormat
-            The zarr format version.
-
         Returns
         -------
         Self
             The wrapped data type.
         """
-        if cls.check_json(data, zarr_format=zarr_format):
-            return cls._from_json_unsafe(data, zarr_format=zarr_format)
+        if cls.check_json_v3(data):
+            return cls._from_json_unchecked(data, zarr_format=3)
         raise DataTypeValidationError(f"Invalid JSON representation of data type {cls}: {data}")
 
     @classmethod
-    @abstractmethod
-    def _from_json_unsafe(cls: type[Self], data: JSON, zarr_format: ZarrFormat) -> Self:
+    def from_json_v2(cls: type[Self], data: JSON, *, object_codec_id: str | None) -> Self:
         """
-        Wrap a JSON representation of a data type.
+        Wrap a Zarr V2 JSON representation of a data type.
 
         Parameters
         ----------
         data : JSON
             The JSON representation of the data type.
 
-        zarr_format : ZarrFormat
-            The zarr format version.
+        Returns
+        -------
+        Self
+            The wrapped data type.
+        """
+        if cls.check_json_v2(data, object_codec_id=object_codec_id):
+            return cls._from_json_unchecked(data, zarr_format=2)
+        raise DataTypeValidationError(
+            f"Invalid JSON representation of data type {cls}: {data!r}, object_codec_id={object_codec_id!r}"
+        )
+
+    @classmethod
+    @overload
+    def _from_json_unchecked(cls, data: DTypeJSON_V2, *, zarr_format: Literal[2]) -> Self: ...
+    @classmethod
+    @overload
+    def _from_json_unchecked(cls, data: DTypeJSON_V3, *, zarr_format: Literal[3]) -> Self: ...
+
+    @classmethod
+    @abstractmethod
+    def _from_json_unchecked(
+        cls, data: DTypeJSON_V2 | DTypeJSON_V3, *, zarr_format: ZarrFormat
+    ) -> Self:
+        """
+        Create a ZDType instance from a JSON representation of a data type.
+
+        This method should be called after input has been type checked, and so it should not perform
+        any input validation.
+
+        Parameters
+        ----------
+        data : JSON
+            The JSON representation of the data type.
 
         Returns
         -------
@@ -300,7 +373,7 @@ def _from_json_unsafe(cls: type[Self], data: JSON, zarr_format: ZarrFormat) -> S
         ...
 
     @abstractmethod
-    def to_json_value(self, data: object, *, zarr_format: ZarrFormat) -> JSON:
+    def to_json_scalar(self, data: object, *, zarr_format: ZarrFormat) -> JSON:
         """
         Convert a single value to JSON-serializable format.
 
@@ -319,7 +392,7 @@ def to_json_value(self, data: object, *, zarr_format: ZarrFormat) -> JSON:
         ...
 
     @abstractmethod
-    def from_json_value(self: Self, data: JSON, *, zarr_format: ZarrFormat) -> TScalar_co:
+    def from_json_scalar(self: Self, data: JSON, *, zarr_format: ZarrFormat) -> TScalar_co:
         """
         Read a JSON-serializable value as a scalar.
 
diff --git a/src/zarr/core/metadata/v2.py b/src/zarr/core/metadata/v2.py
index 23a0275691..ec1ac42264 100644
--- a/src/zarr/core/metadata/v2.py
+++ b/src/zarr/core/metadata/v2.py
@@ -9,7 +9,7 @@
 
 from zarr.abc.metadata import Metadata
 from zarr.core.chunk_grids import RegularChunkGrid
-from zarr.core.dtype import get_data_type_from_native_dtype
+from zarr.core.dtype import get_data_type_from_json_v2
 from zarr.core.dtype.wrapper import TBaseDType, TBaseScalar, TDType_co, TScalar_co, ZDType
 
 if TYPE_CHECKING:
@@ -45,6 +45,9 @@ class ArrayV2MetadataDict(TypedDict):
 # Union of acceptable types for v2 compressors
 CompressorLikev2: TypeAlias = dict[str, JSON] | numcodecs.abc.Codec | None
 
+# These are the ids of the known object codecs for zarr v2.
+ObjectCodecIds = ("vlen-utf8", "vlen-bytes", "vlen-array", "pickle", "json2", "msgpack2")
+
 
 @dataclass(frozen=True, kw_only=True)
 class ArrayV2Metadata(Metadata):
@@ -86,7 +89,7 @@ def __init__(
         filters_parsed = parse_filters(filters)
         fill_value_parsed: TBaseScalar | None
         if fill_value is not None:
-            fill_value_parsed = dtype.cast_value(fill_value)
+            fill_value_parsed = dtype.cast_scalar(fill_value)
         else:
             fill_value_parsed = fill_value
         attributes_parsed = parse_attributes(attributes)
@@ -135,11 +138,29 @@ def from_dict(cls, data: dict[str, Any]) -> ArrayV2Metadata:
         _data = data.copy()
         # Check that the zarr_format attribute is correct.
         _ = parse_zarr_format(_data.pop("zarr_format"))
-        dtype = get_data_type_from_native_dtype(_data["dtype"])
+
+        # To resolve a numpy object dtype array, we need to search for an object codec,
+        # which could be in filters or as a compressor.
+        # we will use a hard-coded list of object codecs for this search.
+        object_codec_id: str | None = None
+        maybe_object_codecs = (data.get("filters"), data.get("compressor"))
+        for maybe_object_codec in maybe_object_codecs:
+            if isinstance(maybe_object_codec, Sequence):
+                for codec in maybe_object_codec:
+                    if isinstance(codec, dict) and codec.get("id") in ObjectCodecIds:
+                        object_codec_id = codec["id"]
+                        break
+            elif (
+                isinstance(maybe_object_codec, dict)
+                and maybe_object_codec.get("id") in ObjectCodecIds
+            ):
+                object_codec_id = maybe_object_codec["id"]
+                break
+        dtype = get_data_type_from_json_v2(data["dtype"], object_codec_id=object_codec_id)
         _data["dtype"] = dtype
         fill_value_encoded = _data.get("fill_value")
         if fill_value_encoded is not None:
-            fill_value = dtype.from_json_value(fill_value_encoded, zarr_format=2)
+            fill_value = dtype.from_json_scalar(fill_value_encoded, zarr_format=2)
             _data["fill_value"] = fill_value
 
         # zarr v2 allowed arbitrary keys here.
@@ -192,11 +213,11 @@ def to_dict(self) -> dict[str, JSON]:
 
         # serialize the fill value after dtype-specific JSON encoding
         if self.fill_value is not None:
-            fill_value = self.dtype.to_json_value(self.fill_value, zarr_format=2)
+            fill_value = self.dtype.to_json_scalar(self.fill_value, zarr_format=2)
             zarray_dict["fill_value"] = fill_value
 
         # serialize the dtype after fill value-specific JSON encoding
-        zarray_dict["dtype"] = self.dtype.to_json(zarr_format=2)
+        zarray_dict["dtype"] = self.dtype.to_json(zarr_format=2)  # type: ignore[assignment]
 
         return zarray_dict
 
diff --git a/src/zarr/core/metadata/v3.py b/src/zarr/core/metadata/v3.py
index 80ed722836..83b9bd7bc8 100644
--- a/src/zarr/core/metadata/v3.py
+++ b/src/zarr/core/metadata/v3.py
@@ -7,7 +7,7 @@
 from zarr.core.dtype import (
     VariableLengthString,
     ZDType,
-    get_data_type_from_json,
+    get_data_type_from_json_v3,
 )
 
 if TYPE_CHECKING:
@@ -175,7 +175,7 @@ def __init__(
         chunk_key_encoding_parsed = ChunkKeyEncoding.from_dict(chunk_key_encoding)
         dimension_names_parsed = parse_dimension_names(dimension_names)
         # Note: relying on a type method is numpy-specific
-        fill_value_parsed = data_type.cast_value(fill_value)
+        fill_value_parsed = data_type.cast_scalar(fill_value)
         attributes_parsed = parse_attributes(attributes)
         codecs_parsed_partial = parse_codecs(codecs)
         storage_transformers_parsed = parse_storage_transformers(storage_transformers)
@@ -306,12 +306,12 @@ def from_dict(cls, data: dict[str, JSON]) -> Self:
         _ = parse_node_type_array(_data.pop("node_type"))
 
         data_type_json = _data.pop("data_type")
-        data_type = get_data_type_from_json(data_type_json, zarr_format=3)
+        data_type = get_data_type_from_json_v3(data_type_json)
 
         # check that the fill value is consistent with the data type
         try:
             fill = _data.pop("fill_value")
-            fill_value_parsed = data_type.from_json_value(fill, zarr_format=3)
+            fill_value_parsed = data_type.from_json_scalar(fill, zarr_format=3)
         except ValueError as e:
             raise TypeError(f"Invalid fill_value: {fill!r}") from e
 
@@ -325,7 +325,7 @@ def from_dict(cls, data: dict[str, JSON]) -> Self:
 
     def to_dict(self) -> dict[str, JSON]:
         out_dict = super().to_dict()
-        out_dict["fill_value"] = self.data_type.to_json_value(
+        out_dict["fill_value"] = self.data_type.to_json_scalar(
             self.fill_value, zarr_format=self.zarr_format
         )
         if not isinstance(out_dict, dict):
diff --git a/tests/package_with_entrypoint/__init__.py b/tests/package_with_entrypoint/__init__.py
index cf8ba4b0bb..834d5654c0 100644
--- a/tests/package_with_entrypoint/__init__.py
+++ b/tests/package_with_entrypoint/__init__.py
@@ -9,7 +9,7 @@
 from zarr.codecs import BytesCodec
 from zarr.core.array_spec import ArraySpec
 from zarr.core.buffer import Buffer, NDBuffer
-from zarr.core.common import ZarrFormat
+from zarr.core.common import JSON, ZarrFormat
 from zarr.core.dtype.npy.bool import Bool
 
 
@@ -74,13 +74,13 @@ class TestDataType(Bool):
     This is a "data type" that serializes to "test"
     """
 
-    _zarr_v3_name = "test"
+    _zarr_v3_name = "test"  # type: ignore[assignment]
 
     @classmethod
-    def from_json(cls, data: Any, zarr_format: Literal[2, 3]) -> Self:
-        if data == cls._zarr_v3_name:
+    def from_json(cls, data: JSON, zarr_format: Literal[2, 3]) -> Self:
+        if data == cls._zarr_v3_name:  # type: ignore[has-type]
             return cls()
         raise ValueError
 
-    def to_json(self, zarr_format: ZarrFormat) -> str:
-        return self._zarr_v3_name
+    def to_json(self, zarr_format: ZarrFormat) -> str:  # type: ignore[override]
+        return self._zarr_v3_name  # type: ignore[no-any-return, has-type]
diff --git a/tests/test_array.py b/tests/test_array.py
index 997470a0d3..7c500fe32b 100644
--- a/tests/test_array.py
+++ b/tests/test_array.py
@@ -163,7 +163,7 @@ def test_array_name_properties_no_group(
     store: LocalStore | MemoryStore, zarr_format: ZarrFormat
 ) -> None:
     arr = zarr.create_array(
-        store=store, shape=(100,), chunks=(10,), zarr_format=zarr_format, dtype="i4"
+        store=store, shape=(100,), chunks=(10,), zarr_format=zarr_format, dtype=">i4"
     )
     assert arr.path == ""
     assert arr.name == "/"
@@ -214,7 +214,7 @@ def test_array_fill_value_default(
         )
     else:
         arr = zarr.create_array(store=store, shape=shape, dtype=zdtype, zarr_format=3, chunks=shape)
-    expected_fill_value = zdtype.default_value()
+    expected_fill_value = zdtype.default_scalar()
     if isinstance(expected_fill_value, np.datetime64 | np.timedelta64):
         if np.isnat(expected_fill_value):
             assert np.isnat(arr.fill_value)
@@ -370,7 +370,7 @@ def test_storage_transformers(store: MemoryStore, zarr_format: ZarrFormat | str)
             "zarr_format": zarr_format,
             "shape": (10,),
             "chunks": (1,),
-            "dtype": "uint8",
+            "dtype": "|u1",
             "dimension_separator": ".",
             "codecs": (BytesCodec().to_dict(),),
             "fill_value": 0,
@@ -1008,9 +1008,9 @@ def test_default_fill_value(dtype: ZDType[Any, Any], store: Store) -> None:
         """
         a = zarr.create_array(store, shape=(5,), chunks=(5,), dtype=dtype)
         if isinstance(dtype, DateTime64 | TimeDelta64) and np.isnat(a.fill_value):
-            assert np.isnat(dtype.default_value())
+            assert np.isnat(dtype.default_scalar())
         else:
-            assert a.fill_value == dtype.default_value()
+            assert a.fill_value == dtype.default_scalar()
 
     @staticmethod
     @pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning")
@@ -1029,7 +1029,7 @@ def test_dtype_forms(dtype: ZDType[Any, Any], store: Store, zarr_format: ZarrFor
             name="b",
             shape=(5,),
             chunks=(5,),
-            dtype=dtype.to_dtype(),
+            dtype=dtype.to_native_dtype(),
             zarr_format=zarr_format,
         )
         assert a.dtype == b.dtype
@@ -1044,7 +1044,7 @@ def test_dtype_forms(dtype: ZDType[Any, Any], store: Store, zarr_format: ZarrFor
                     name="c",
                     shape=(5,),
                     chunks=(5,),
-                    dtype=dtype.to_dtype().char,
+                    dtype=dtype.to_native_dtype().char,
                     zarr_format=zarr_format,
                 )
             else:
@@ -1053,7 +1053,7 @@ def test_dtype_forms(dtype: ZDType[Any, Any], store: Store, zarr_format: ZarrFor
                     name="c",
                     shape=(5,),
                     chunks=(5,),
-                    dtype=dtype.to_dtype().str,
+                    dtype=dtype.to_native_dtype().str,
                     zarr_format=zarr_format,
                 )
             assert a.dtype == c.dtype
@@ -1308,7 +1308,7 @@ async def test_default_filters_compressors(
 
         arr = await create_array(
             store=store,
-            dtype=dtype,
+            dtype=dtype,  # type: ignore[arg-type]
             shape=(10,),
             zarr_format=zarr_format,
         )
@@ -1320,14 +1320,14 @@ async def test_default_filters_compressors(
                 compressors=sig.parameters["compressors"].default,
                 filters=sig.parameters["filters"].default,
                 serializer=sig.parameters["serializer"].default,
-                dtype=dtype,
+                dtype=dtype,  # type: ignore[arg-type]
             )
 
         elif zarr_format == 2:
             default_filters, default_compressors = _parse_chunk_encoding_v2(
                 compressor=sig.parameters["compressors"].default,
                 filters=sig.parameters["filters"].default,
-                dtype=dtype,
+                dtype=dtype,  # type: ignore[arg-type]
             )
             if default_filters is None:
                 expected_filters = ()
diff --git a/tests/test_dtype/conftest.py b/tests/test_dtype/conftest.py
index 2b21a57365..b2aa89afd7 100644
--- a/tests/test_dtype/conftest.py
+++ b/tests/test_dtype/conftest.py
@@ -17,7 +17,7 @@
         with warnings.catch_warnings():
             warnings.simplefilter("ignore")
             zdtype_examples += (
-                wrapper_cls.from_dtype(np.dtype([("a", np.float64), ("b", np.int8)])),
+                wrapper_cls.from_native_dtype(np.dtype([("a", np.float64), ("b", np.int8)])),
             )
     elif issubclass(wrapper_cls, HasLength):
         zdtype_examples += (wrapper_cls(length=1),)
diff --git a/tests/test_dtype/test_npy/test_bool.py b/tests/test_dtype/test_npy/test_bool.py
index 1adae57f02..03dc550a9d 100644
--- a/tests/test_dtype/test_npy/test_bool.py
+++ b/tests/test_dtype/test_npy/test_bool.py
@@ -2,11 +2,11 @@
 
 import numpy as np
 
-from tests.test_dtype.test_wrapper import _TestZDType
+from tests.test_dtype.test_wrapper import BaseTestZDType, V2JsonTestParams
 from zarr.core.dtype.npy.bool import Bool
 
 
-class TestBool(_TestZDType):
+class TestBool(BaseTestZDType):
     test_cls = Bool
 
     valid_dtype = (np.dtype(np.bool_),)
@@ -15,7 +15,7 @@ class TestBool(_TestZDType):
         np.dtype(np.float64),
         np.dtype(np.uint16),
     )
-    valid_json_v2 = ("|b1",)
+    valid_json_v2 = (V2JsonTestParams(dtype="|b1"),)
     valid_json_v3 = ("bool",)
     invalid_json_v2 = (
         "|b1",
diff --git a/tests/test_dtype/test_npy/test_common.py b/tests/test_dtype/test_npy/test_common.py
index 258ab48fe1..c4a82e22b0 100644
--- a/tests/test_dtype/test_npy/test_common.py
+++ b/tests/test_dtype/test_npy/test_common.py
@@ -45,7 +45,7 @@ def nan_equal(a: object, b: object) -> bool:
     return a == b
 
 
-json_float_v2_cases: list[tuple[JSONFloatV2, float | np.floating[Any]]] = [
+json_float_v2_roundtrip_cases: tuple[tuple[JSONFloatV2, float | np.floating[Any]], ...] = (
     ("Infinity", float("inf")),
     ("Infinity", np.inf),
     ("-Infinity", float("-inf")),
@@ -53,11 +53,9 @@ def nan_equal(a: object, b: object) -> bool:
     ("NaN", float("nan")),
     ("NaN", np.nan),
     (1.0, 1.0),
-]
+)
 
-# exactly the same as v2, for now, until we get support for the special NaN encoding defined in the
-# v3 spec
-json_float_v3_cases = json_float_v2_cases
+json_float_v3_cases = json_float_v2_roundtrip_cases
 
 
 @pytest.mark.parametrize(
@@ -94,13 +92,15 @@ def test_endianness_to_numpy_str(data: str | None, expected: str) -> None:
             endianness_to_numpy_str(data)  # type: ignore[arg-type]
 
 
-@pytest.mark.parametrize(("data", "expected"), json_float_v2_cases + [("SHOULD_ERR", "")])
+@pytest.mark.parametrize(
+    ("data", "expected"), json_float_v2_roundtrip_cases + (("SHOULD_ERR", ""),)
+)
 def test_float_from_json_v2(data: JSONFloatV2 | str, expected: float | str) -> None:
     """
     Test that float_from_json_v2 correctly converts a JSON string representation of a float to a float.
     This test also checks that an invalid string input raises a ``ValueError``
     """
-    if data in get_args(SpecialFloatStrings) or isinstance(data, float):
+    if data != "SHOULD_ERR":
         assert nan_equal(float_from_json_v2(data), expected)  # type: ignore[arg-type]
     else:
         msg = f"could not convert string to float: {data!r}"
@@ -108,25 +108,35 @@ def test_float_from_json_v2(data: JSONFloatV2 | str, expected: float | str) -> N
             float_from_json_v2(data)  # type: ignore[arg-type]
 
 
-@pytest.mark.parametrize(("data", "expected"), json_float_v3_cases + [("SHOULD_ERR", "")])
+@pytest.mark.parametrize(
+    ("data", "expected"), json_float_v3_cases + (("SHOULD_ERR", ""), ("0x", ""))
+)
 def test_float_from_json_v3(data: JSONFloatV2 | str, expected: float | str) -> None:
     """
     Test that float_from_json_v3 correctly converts a JSON string representation of a float to a float.
     This test also checks that an invalid string input raises a ``ValueError``
     """
-    if data in get_args(SpecialFloatStrings) or isinstance(data, float):
-        assert nan_equal(float_from_json_v3(data), expected)
-    else:
+    if data == "SHOULD_ERR":
         msg = (
             f"Invalid float value: {data!r}. Expected a string starting with the hex prefix"
             " '0x', or one of 'NaN', 'Infinity', or '-Infinity'."
         )
         with pytest.raises(ValueError, match=msg):
             float_from_json_v3(data)
+    elif data == "0x":
+        msg = (
+            f"Invalid hexadecimal float value: {data!r}. "
+            "Expected the '0x' prefix to be followed by 4, 8, or 16 numeral characters"
+        )
+
+        with pytest.raises(ValueError, match=msg):
+            float_from_json_v3(data)
+    else:
+        assert nan_equal(float_from_json_v3(data), expected)
 
 
 # note the order of parameters relative to the order of the parametrized variable.
-@pytest.mark.parametrize(("expected", "data"), json_float_v2_cases)
+@pytest.mark.parametrize(("expected", "data"), json_float_v2_roundtrip_cases)
 def test_float_to_json_v2(data: float | np.floating[Any], expected: JSONFloatV2) -> None:
     """
     Test that floats are JSON-encoded properly for zarr v2
@@ -170,7 +180,7 @@ def test_bytes_to_json(zarr_format: ZarrFormat) -> None:
 
 
 # note the order of parameters relative to the order of the parametrized variable.
-@pytest.mark.parametrize(("json_expected", "float_data"), json_float_v2_cases)
+@pytest.mark.parametrize(("json_expected", "float_data"), json_float_v2_roundtrip_cases)
 def test_complex_to_json_v2(
     float_data: float | np.floating[Any], json_expected: JSONFloatV2
 ) -> None:
diff --git a/tests/test_dtype/test_npy/test_complex.py b/tests/test_dtype/test_npy/test_complex.py
index 45a3a1480e..fd216d8415 100644
--- a/tests/test_dtype/test_npy/test_complex.py
+++ b/tests/test_dtype/test_npy/test_complex.py
@@ -4,11 +4,11 @@
 
 import numpy as np
 
-from tests.test_dtype.test_wrapper import _TestZDType
+from tests.test_dtype.test_wrapper import BaseTestZDType, V2JsonTestParams
 from zarr.core.dtype.npy.complex import Complex64, Complex128
 
 
-class _BaseTestFloat(_TestZDType):
+class _BaseTestFloat(BaseTestZDType):
     def scalar_equals(self, scalar1: object, scalar2: object) -> bool:
         if np.isnan(scalar1) and np.isnan(scalar2):  # type: ignore[call-overload]
             return True
@@ -23,7 +23,7 @@ class TestComplex64(_BaseTestFloat):
         np.dtype(np.float64),
         np.dtype(np.complex128),
     )
-    valid_json_v2 = (">c8", "<c8")
+    valid_json_v2 = (V2JsonTestParams(dtype=">c8"), V2JsonTestParams(dtype="<c8"))
     valid_json_v3 = ("complex64",)
     invalid_json_v2 = (
         "|c8",
@@ -63,7 +63,7 @@ class TestComplex128(_BaseTestFloat):
         np.dtype(np.float64),
         np.dtype(np.complex64),
     )
-    valid_json_v2 = (">c16", "<c16")
+    valid_json_v2 = (V2JsonTestParams(dtype=">c16"), V2JsonTestParams(dtype="<c16"))
     valid_json_v3 = ("complex128",)
     invalid_json_v2 = (
         "|c16",
diff --git a/tests/test_dtype/test_npy/test_float.py b/tests/test_dtype/test_npy/test_float.py
index daa9bafac0..49d2899b52 100644
--- a/tests/test_dtype/test_npy/test_float.py
+++ b/tests/test_dtype/test_npy/test_float.py
@@ -2,11 +2,11 @@
 
 import numpy as np
 
-from tests.test_dtype.test_wrapper import _TestZDType
+from tests.test_dtype.test_wrapper import BaseTestZDType, V2JsonTestParams
 from zarr.core.dtype.npy.float import Float16, Float32, Float64
 
 
-class _BaseTestFloat(_TestZDType):
+class _BaseTestFloat(BaseTestZDType):
     def scalar_equals(self, scalar1: object, scalar2: object) -> bool:
         if np.isnan(scalar1) and np.isnan(scalar2):  # type: ignore[call-overload]
             return True
@@ -20,7 +20,7 @@ def test_hex_encoding(self, hex_string_params: tuple[str, float]) -> None:
         """
         hex_string, expected = hex_string_params
         zdtype = self.test_cls()
-        observed = zdtype.from_json_value(hex_string, zarr_format=3)
+        observed = zdtype.from_json_scalar(hex_string, zarr_format=3)
         assert self.scalar_equals(observed, expected)
 
 
@@ -32,8 +32,8 @@ class TestFloat16(_BaseTestFloat):
         np.dtype(np.uint16),
         np.dtype(np.float32),
     )
-    valid_json_v2 = Float16._zarr_v2_names
-    valid_json_v3 = (Float16._zarr_v3_name,)
+    valid_json_v2 = (V2JsonTestParams(dtype=">f2"), V2JsonTestParams(dtype="<f2"))
+    valid_json_v3 = ("float16",)
     invalid_json_v2 = (
         "|f2",
         "float16",
@@ -76,8 +76,8 @@ class TestFloat32(_BaseTestFloat):
         np.dtype(np.uint16),
         np.dtype(np.float64),
     )
-    valid_json_v2 = Float32._zarr_v2_names
-    valid_json_v3 = (Float32._zarr_v3_name,)
+    valid_json_v2 = (V2JsonTestParams(dtype=">f4"), V2JsonTestParams(dtype="<f4"))
+    valid_json_v3 = ("float32",)
     invalid_json_v2 = (
         "|f4",
         "float32",
@@ -120,8 +120,8 @@ class TestFloat64(_BaseTestFloat):
         np.dtype(np.uint16),
         np.dtype(np.float32),
     )
-    valid_json_v2 = Float64._zarr_v2_names
-    valid_json_v3 = (Float64._zarr_v3_name,)
+    valid_json_v2 = (V2JsonTestParams(dtype=">f8"), V2JsonTestParams(dtype="<f8"))
+    valid_json_v3 = ("float64",)
     invalid_json_v2 = (
         "|f8",
         "float64",
diff --git a/tests/test_dtype/test_npy/test_int.py b/tests/test_dtype/test_npy/test_int.py
index 5b0180af3b..5db65690dd 100644
--- a/tests/test_dtype/test_npy/test_int.py
+++ b/tests/test_dtype/test_npy/test_int.py
@@ -2,11 +2,11 @@
 
 import numpy as np
 
-from tests.test_dtype.test_wrapper import _TestZDType
+from tests.test_dtype.test_wrapper import BaseTestZDType, V2JsonTestParams
 from zarr.core.dtype.npy.int import Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64
 
 
-class TestInt8(_TestZDType):
+class TestInt8(BaseTestZDType):
     test_cls = Int8
     scalar_type = np.int8
     valid_dtype = (np.dtype(np.int8),)
@@ -15,7 +15,7 @@ class TestInt8(_TestZDType):
         np.dtype(np.uint16),
         np.dtype(np.float64),
     )
-    valid_json_v2 = ("|i1",)
+    valid_json_v2 = (V2JsonTestParams(dtype="|i1"),)
     valid_json_v3 = ("int8",)
     invalid_json_v2 = (
         ">i1",
@@ -37,7 +37,7 @@ class TestInt8(_TestZDType):
     item_size_params = (Int8(),)
 
 
-class TestInt16(_TestZDType):
+class TestInt16(BaseTestZDType):
     test_cls = Int16
     scalar_type = np.int16
     valid_dtype = (np.dtype(">i2"), np.dtype("<i2"))
@@ -46,7 +46,7 @@ class TestInt16(_TestZDType):
         np.dtype(np.uint16),
         np.dtype(np.float64),
     )
-    valid_json_v2 = (">i2", "<i2")
+    valid_json_v2 = (V2JsonTestParams(dtype=">i2"), V2JsonTestParams(dtype="<i2"))
     valid_json_v3 = ("int16",)
     invalid_json_v2 = (
         "|i2",
@@ -69,7 +69,7 @@ class TestInt16(_TestZDType):
     item_size_params = (Int16(),)
 
 
-class TestInt32(_TestZDType):
+class TestInt32(BaseTestZDType):
     test_cls = Int32
     scalar_type = np.int32
     valid_dtype = (np.dtype(">i4"), np.dtype("<i4"))
@@ -78,7 +78,7 @@ class TestInt32(_TestZDType):
         np.dtype(np.uint16),
         np.dtype(np.float64),
     )
-    valid_json_v2 = (">i4", "<i4")
+    valid_json_v2 = (V2JsonTestParams(dtype=">i4"), V2JsonTestParams(dtype="<i4"))
     valid_json_v3 = ("int32",)
     invalid_json_v2 = (
         "|i4",
@@ -100,7 +100,7 @@ class TestInt32(_TestZDType):
     item_size_params = (Int32(),)
 
 
-class TestInt64(_TestZDType):
+class TestInt64(BaseTestZDType):
     test_cls = Int64
     scalar_type = np.int64
     valid_dtype = (np.dtype(">i8"), np.dtype("<i8"))
@@ -109,7 +109,7 @@ class TestInt64(_TestZDType):
         np.dtype(np.uint16),
         np.dtype(np.float64),
     )
-    valid_json_v2 = (">i8", "<i8")
+    valid_json_v2 = (V2JsonTestParams(dtype=">i8"), V2JsonTestParams(dtype="<i8"))
     valid_json_v3 = ("int64",)
     invalid_json_v2 = (
         "|i8",
@@ -131,7 +131,7 @@ class TestInt64(_TestZDType):
     item_size_params = (Int64(),)
 
 
-class TestUInt8(_TestZDType):
+class TestUInt8(BaseTestZDType):
     test_cls = UInt8
     scalar_type = np.uint8
     valid_dtype = (np.dtype(np.uint8),)
@@ -140,7 +140,7 @@ class TestUInt8(_TestZDType):
         np.dtype(np.int16),
         np.dtype(np.float64),
     )
-    valid_json_v2 = ("|u1",)
+    valid_json_v2 = (V2JsonTestParams(dtype="|u1"),)
     valid_json_v3 = ("uint8",)
     invalid_json_v2 = (
         "|u1",
@@ -162,7 +162,7 @@ class TestUInt8(_TestZDType):
     item_size_params = (UInt8(),)
 
 
-class TestUInt16(_TestZDType):
+class TestUInt16(BaseTestZDType):
     test_cls = UInt16
     scalar_type = np.uint16
     valid_dtype = (np.dtype(">u2"), np.dtype("<u2"))
@@ -171,7 +171,7 @@ class TestUInt16(_TestZDType):
         np.dtype(np.int16),
         np.dtype(np.float64),
     )
-    valid_json_v2 = (">u2", "<u2")
+    valid_json_v2 = (V2JsonTestParams(dtype=">u2"), V2JsonTestParams(dtype="<u2"))
     valid_json_v3 = ("uint16",)
     invalid_json_v2 = (
         "|u2",
@@ -193,7 +193,7 @@ class TestUInt16(_TestZDType):
     item_size_params = (UInt16(),)
 
 
-class TestUInt32(_TestZDType):
+class TestUInt32(BaseTestZDType):
     test_cls = UInt32
     scalar_type = np.uint32
     valid_dtype = (np.dtype(">u4"), np.dtype("<u4"))
@@ -202,7 +202,7 @@ class TestUInt32(_TestZDType):
         np.dtype(np.int16),
         np.dtype(np.float64),
     )
-    valid_json_v2 = (">u4", "<u4")
+    valid_json_v2 = (V2JsonTestParams(dtype=">u4"), V2JsonTestParams(dtype="<u4"))
     valid_json_v3 = ("uint32",)
     invalid_json_v2 = (
         "|u4",
@@ -224,7 +224,7 @@ class TestUInt32(_TestZDType):
     item_size_params = (UInt32(),)
 
 
-class TestUInt64(_TestZDType):
+class TestUInt64(BaseTestZDType):
     test_cls = UInt64
     scalar_type = np.uint64
     valid_dtype = (np.dtype(">u8"), np.dtype("<u8"))
@@ -233,7 +233,7 @@ class TestUInt64(_TestZDType):
         np.dtype(np.int16),
         np.dtype(np.float64),
     )
-    valid_json_v2 = (">u8", "<u8")
+    valid_json_v2 = (V2JsonTestParams(dtype=">u8"), V2JsonTestParams(dtype="<u8"))
     valid_json_v3 = ("uint64",)
     invalid_json_v2 = (
         "|u8",
diff --git a/tests/test_dtype/test_npy/test_sized.py b/tests/test_dtype/test_npy/test_sized.py
index eaaa915f59..d7aef88168 100644
--- a/tests/test_dtype/test_npy/test_sized.py
+++ b/tests/test_dtype/test_npy/test_sized.py
@@ -4,7 +4,7 @@
 
 import numpy as np
 
-from tests.test_dtype.test_wrapper import _TestZDType
+from tests.test_dtype.test_wrapper import BaseTestZDType, V2JsonTestParams
 from zarr.core.dtype import (
     FixedLengthBytes,
     Float16,
@@ -15,7 +15,7 @@
 )
 
 
-class TestFixedLengthBytes(_TestZDType):
+class TestFixedLengthBytes(BaseTestZDType):
     test_cls = FixedLengthBytes
     valid_dtype = (np.dtype("|V10"),)
     invalid_dtype = (
@@ -23,10 +23,10 @@ class TestFixedLengthBytes(_TestZDType):
         np.dtype(np.float64),
         np.dtype("|S10"),
     )
-    valid_json_v2 = ("|V10",)
+    valid_json_v2 = (V2JsonTestParams(dtype="|V10"),)
     valid_json_v3 = (
-        {"name": "numpy.fixed_length_bytes", "configuration": {"length_bytes": 0}},
-        {"name": "numpy.fixed_length_bytes", "configuration": {"length_bytes": 8}},
+        {"name": "fixed_length_bytes", "configuration": {"length_bytes": 0}},
+        {"name": "fixed_length_bytes", "configuration": {"length_bytes": 8}},
     )
 
     invalid_json_v2 = (
@@ -61,7 +61,7 @@ class TestFixedLengthBytes(_TestZDType):
     )
 
 
-class TestStructured(_TestZDType):
+class TestStructured(BaseTestZDType):
     test_cls = Structured
     valid_dtype = (
         np.dtype([("field1", np.int32), ("field2", np.float64)]),
@@ -73,8 +73,8 @@ class TestStructured(_TestZDType):
         np.dtype("|S10"),
     )
     valid_json_v2 = (
-        [("field1", ">i4"), ("field2", ">f8")],
-        [("field1", ">i8"), ("field2", ">i4")],
+        V2JsonTestParams(dtype=[("field1", ">i4"), ("field2", ">f8")]),
+        V2JsonTestParams(dtype=[("field1", ">i8"), ("field2", ">i4")]),
     )
     valid_json_v3 = (
         {
@@ -99,7 +99,7 @@ class TestStructured(_TestZDType):
                     ),
                     (
                         "field2",
-                        {"name": "numpy.fixed_length_utf32", "configuration": {"length_bytes": 32}},
+                        {"name": "fixed_length_utf32", "configuration": {"length_bytes": 32}},
                     ),
                 ]
             },
diff --git a/tests/test_dtype/test_npy/test_string.py b/tests/test_dtype/test_npy/test_string.py
index 6620f45052..73c8612db4 100644
--- a/tests/test_dtype/test_npy/test_string.py
+++ b/tests/test_dtype/test_npy/test_string.py
@@ -2,13 +2,13 @@
 
 import numpy as np
 
-from tests.test_dtype.test_wrapper import _TestZDType
+from tests.test_dtype.test_wrapper import BaseTestZDType, V2JsonTestParams
 from zarr.core.dtype import FixedLengthASCII, FixedLengthUTF32
 from zarr.core.dtype.npy.string import _NUMPY_SUPPORTS_VLEN_STRING, VariableLengthString
 
 if _NUMPY_SUPPORTS_VLEN_STRING:
 
-    class TestVariableLengthString(_TestZDType):
+    class TestVariableLengthString(BaseTestZDType):
         test_cls = VariableLengthString  # type: ignore[assignment]
         valid_dtype = (np.dtypes.StringDType(),)  # type: ignore[assignment]
         invalid_dtype = (
@@ -16,15 +16,15 @@ class TestVariableLengthString(_TestZDType):
             np.dtype(np.float64),
             np.dtype("|S10"),
         )
-        valid_json_v2 = ("|O",)
-        valid_json_v3 = ("numpy.variable_length_utf8",)
+        valid_json_v2 = (V2JsonTestParams(dtype="|O", object_codec_id="vlen-utf8"),)
+        valid_json_v3 = ("variable_length_utf8",)
         invalid_json_v2 = (
             "|S10",
             "|f8",
             "invalid",
         )
         invalid_json_v3 = (
-            {"name": "numpy.variable_length_utf8", "configuration": {"invalid_key": "value"}},
+            {"name": "variable_length_utf8", "configuration": {"invalid_key": "value"}},
             {"name": "invalid_name"},
         )
 
@@ -42,7 +42,7 @@ class TestVariableLengthString(_TestZDType):
 
 else:
 
-    class TestVariableLengthString(_TestZDType):  # type: ignore[no-redef]
+    class TestVariableLengthString(BaseTestZDType):  # type: ignore[no-redef]
         test_cls = VariableLengthString  # type: ignore[assignment]
         valid_dtype = (np.dtype("O"),)
         invalid_dtype = (
@@ -50,8 +50,8 @@ class TestVariableLengthString(_TestZDType):  # type: ignore[no-redef]
             np.dtype(np.float64),
             np.dtype("|S10"),
         )
-        valid_json_v2 = ("|O",)
-        valid_json_v3 = ("numpy.variable_length_utf8",)
+        valid_json_v2 = (V2JsonTestParams(dtype="|O", object_codec_id="vlen-utf8"),)
+        valid_json_v3 = ("variable_length_utf8",)
         invalid_json_v2 = (
             "|S10",
             "|f8",
@@ -76,7 +76,7 @@ class TestVariableLengthString(_TestZDType):  # type: ignore[no-redef]
         item_size_params = (VariableLengthString(),)
 
 
-class TestFixedLengthAscii(_TestZDType):
+class TestFixedLengthAscii(BaseTestZDType):
     test_cls = FixedLengthASCII
     valid_dtype = (np.dtype("|S10"), np.dtype("|S4"))
     invalid_dtype = (
@@ -84,15 +84,19 @@ class TestFixedLengthAscii(_TestZDType):
         np.dtype(np.float64),
         np.dtype("|U10"),
     )
-    valid_json_v2 = ("|S0", "|S2", "|S4")
-    valid_json_v3 = ({"name": "numpy.fixed_length_ascii", "configuration": {"length_bytes": 10}},)
+    valid_json_v2 = (
+        V2JsonTestParams(dtype="|S0"),
+        V2JsonTestParams(dtype="|S2"),
+        V2JsonTestParams(dtype="|S4"),
+    )
+    valid_json_v3 = ({"name": "fixed_length_ascii", "configuration": {"length_bytes": 10}},)
     invalid_json_v2 = (
         "|S",
         "|U10",
         "|f8",
     )
     invalid_json_v3 = (
-        {"name": "numpy.fixed_length_ascii", "configuration": {"length_bits": 0}},
+        {"name": "fixed_length_ascii", "configuration": {"length_bits": 0}},
         {"name": "numpy.fixed_length_ascii", "configuration": {"length_bits": "invalid"}},
     )
 
@@ -118,7 +122,7 @@ class TestFixedLengthAscii(_TestZDType):
     )
 
 
-class TestFixedLengthUTF32(_TestZDType):
+class TestFixedLengthUTF32(BaseTestZDType):
     test_cls = FixedLengthUTF32
     valid_dtype = (np.dtype(">U10"), np.dtype("<U10"))
     invalid_dtype = (
@@ -126,15 +130,15 @@ class TestFixedLengthUTF32(_TestZDType):
         np.dtype(np.float64),
         np.dtype("|S10"),
     )
-    valid_json_v2 = (">U10", "<U10")
-    valid_json_v3 = ({"name": "numpy.fixed_length_utf32", "configuration": {"length_bytes": 320}},)
+    valid_json_v2 = (V2JsonTestParams(dtype=">U10"), V2JsonTestParams(dtype="<U10"))
+    valid_json_v3 = ({"name": "fixed_length_utf32", "configuration": {"length_bytes": 320}},)
     invalid_json_v2 = (
         "|U",
         "|S10",
         "|f8",
     )
     invalid_json_v3 = (
-        {"name": "numpy.fixed_length_utf32", "configuration": {"length_bits": 0}},
+        {"name": "fixed_length_utf32", "configuration": {"length_bits": 0}},
         {"name": "numpy.fixed_length_utf32", "configuration": {"length_bits": "invalid"}},
     )
 
diff --git a/tests/test_dtype/test_npy/test_time.py b/tests/test_dtype/test_npy/test_time.py
index 90c573007f..96281434cd 100644
--- a/tests/test_dtype/test_npy/test_time.py
+++ b/tests/test_dtype/test_npy/test_time.py
@@ -6,12 +6,12 @@
 import numpy as np
 import pytest
 
-from tests.test_dtype.test_wrapper import _TestZDType
+from tests.test_dtype.test_wrapper import BaseTestZDType, V2JsonTestParams
 from zarr.core.dtype.npy.common import DateTimeUnit
 from zarr.core.dtype.npy.time import DateTime64, TimeDelta64, datetime_from_int
 
 
-class _TestTimeBase(_TestZDType):
+class _TestTimeBase(BaseTestZDType):
     def json_scalar_equals(self, scalar1: object, scalar2: object) -> bool:
         # This method gets overridden here to support the equivalency between NaT and
         # -9223372036854775808 fill values
@@ -34,7 +34,12 @@ class TestDateTime64(_TestTimeBase):
         np.dtype(np.float64),
         np.dtype("timedelta64[ns]"),
     )
-    valid_json_v2 = (">M8", ">M8[s]", "<M8[10s]", "<M8[10us]")
+    valid_json_v2 = (
+        V2JsonTestParams(dtype=">M8"),
+        V2JsonTestParams(dtype=">M8[s]"),
+        V2JsonTestParams(dtype="<M8[10s]"),
+        V2JsonTestParams(dtype="<M8[10us]"),
+    )
     valid_json_v3 = (
         {"name": "numpy.datetime64", "configuration": {"unit": "ns", "scale_factor": 10}},
         {"name": "numpy.datetime64", "configuration": {"unit": "us", "scale_factor": 1}},
@@ -75,7 +80,12 @@ class TestTimeDelta64(_TestTimeBase):
         np.dtype("datetime64[ns]"),
     )
 
-    valid_json_v2 = TimeDelta64._zarr_v2_names
+    valid_json_v2 = (
+        V2JsonTestParams(dtype=">m8"),
+        V2JsonTestParams(dtype=">m8[s]"),
+        V2JsonTestParams(dtype="<m8[10s]"),
+        V2JsonTestParams(dtype="<m8[10us]"),
+    )
     valid_json_v3 = (
         {"name": "numpy.timedelta64", "configuration": {"unit": "ns", "scale_factor": 10}},
         {"name": "numpy.timedelta64", "configuration": {"unit": "us", "scale_factor": 1}},
diff --git a/tests/test_dtype/test_wrapper.py b/tests/test_dtype/test_wrapper.py
index 9a5e3ee56d..0c3a2b106f 100644
--- a/tests/test_dtype/test_wrapper.py
+++ b/tests/test_dtype/test_wrapper.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+from dataclasses import dataclass
 from typing import TYPE_CHECKING, Any, ClassVar
 
 import pytest
@@ -26,7 +27,13 @@ def test_schema(self, schema: json_schema.Schema) -> None:
 """
 
 
-class _TestZDType:
+@dataclass(frozen=True, kw_only=True, slots=True)
+class V2JsonTestParams:
+    dtype: str | dict[str, object] | list[object]
+    object_codec_id: str | None = None
+
+
+class BaseTestZDType:
     """
     A base class for testing ZDType subclasses. This class works in conjunction with the custom
     pytest collection function ``pytest_generate_tests`` defined in conftest.py, which applies the
@@ -66,7 +73,7 @@ class _TestZDType:
     valid_dtype: ClassVar[tuple[TBaseDType, ...]] = ()
     invalid_dtype: ClassVar[tuple[TBaseDType, ...]] = ()
 
-    valid_json_v2: ClassVar[tuple[str | dict[str, object] | list[object], ...]] = ()
+    valid_json_v2: ClassVar[tuple[V2JsonTestParams, ...]] = ()
     invalid_json_v2: ClassVar[tuple[str | dict[str, object] | list[object], ...]] = ()
 
     valid_json_v3: ClassVar[tuple[str | dict[str, object], ...]] = ()
@@ -92,37 +99,40 @@ def scalar_equals(self, scalar1: object, scalar2: object) -> bool:
         return scalar1 == scalar2
 
     def test_check_dtype_valid(self, valid_dtype: TBaseDType) -> None:
-        assert self.test_cls.check_dtype(valid_dtype)
+        assert self.test_cls.check_native_dtype(valid_dtype)
 
     def test_check_dtype_invalid(self, invalid_dtype: object) -> None:
-        assert not self.test_cls.check_dtype(invalid_dtype)  # type: ignore[arg-type]
+        assert not self.test_cls.check_native_dtype(invalid_dtype)  # type: ignore[arg-type]
 
     def test_from_dtype_roundtrip(self, valid_dtype: Any) -> None:
-        zdtype = self.test_cls.from_dtype(valid_dtype)
-        assert zdtype.to_dtype() == valid_dtype
+        zdtype = self.test_cls.from_native_dtype(valid_dtype)
+        assert zdtype.to_native_dtype() == valid_dtype
 
-    def test_from_json_roundtrip_v2(self, valid_json_v2: Any) -> None:
-        zdtype = self.test_cls.from_json(valid_json_v2, zarr_format=2)
-        assert zdtype.to_json(zarr_format=2) == valid_json_v2
+    def test_from_json_roundtrip_v2(self, valid_json_v2: V2JsonTestParams) -> None:
+        zdtype = self.test_cls.from_json_v2(
+            valid_json_v2.dtype,  # type: ignore[arg-type]
+            object_codec_id=valid_json_v2.object_codec_id,
+        )
+        assert zdtype.to_json(zarr_format=2) == valid_json_v2.dtype
 
     @pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning")
     def test_from_json_roundtrip_v3(self, valid_json_v3: Any) -> None:
-        zdtype = self.test_cls.from_json(valid_json_v3, zarr_format=3)
+        zdtype = self.test_cls.from_json_v3(valid_json_v3)
         assert zdtype.to_json(zarr_format=3) == valid_json_v3
 
-    def test_scalar_roundtrip_v2(self, scalar_v2_params: tuple[Any, Any]) -> None:
+    def test_scalar_roundtrip_v2(self, scalar_v2_params: tuple[ZDType[Any, Any], Any]) -> None:
         zdtype, scalar_json = scalar_v2_params
-        scalar = zdtype.from_json_value(scalar_json, zarr_format=2)
-        assert self.json_scalar_equals(scalar_json, zdtype.to_json_value(scalar, zarr_format=2))
+        scalar = zdtype.from_json_scalar(scalar_json, zarr_format=2)
+        assert self.json_scalar_equals(scalar_json, zdtype.to_json_scalar(scalar, zarr_format=2))
 
-    def test_scalar_roundtrip_v3(self, scalar_v3_params: tuple[Any, Any]) -> None:
+    def test_scalar_roundtrip_v3(self, scalar_v3_params: tuple[ZDType[Any, Any], Any]) -> None:
         zdtype, scalar_json = scalar_v3_params
-        scalar = zdtype.from_json_value(scalar_json, zarr_format=3)
-        assert self.json_scalar_equals(scalar_json, zdtype.to_json_value(scalar, zarr_format=3))
+        scalar = zdtype.from_json_scalar(scalar_json, zarr_format=3)
+        assert self.json_scalar_equals(scalar_json, zdtype.to_json_scalar(scalar, zarr_format=3))
 
-    def test_cast_value(self, cast_value_params: tuple[Any, Any, Any]) -> None:
+    def test_cast_value(self, cast_value_params: tuple[ZDType[Any, Any], Any, Any]) -> None:
         zdtype, value, expected = cast_value_params
-        observed = zdtype.cast_value(value)
+        observed = zdtype.cast_scalar(value)
         assert self.scalar_equals(expected, observed)
 
     def test_item_size(self, item_size_params: ZDType[Any, Any]) -> None:
@@ -131,6 +141,6 @@ def test_item_size(self, item_size_params: ZDType[Any, Any]) -> None:
         with a fixed scalar size.
         """
         if isinstance(item_size_params, HasItemSize):
-            assert item_size_params.item_size == item_size_params.to_dtype().itemsize
+            assert item_size_params.item_size == item_size_params.to_native_dtype().itemsize
         else:
             pytest.skip(f"Dtype {item_size_params} does not implement HasItemSize")
diff --git a/tests/test_dtype_registry.py b/tests/test_dtype_registry.py
index 0c650e5c29..c4225874a4 100644
--- a/tests/test_dtype_registry.py
+++ b/tests/test_dtype_registry.py
@@ -23,9 +23,10 @@
     TBaseScalar,
     ZDType,
     data_type_registry,
-    get_data_type_from_json,
+    get_data_type_from_json_v3,
     parse_data_type,
 )
+from zarr.core.dtype.common import HasObjectCodec
 
 if TYPE_CHECKING:
     from collections.abc import Generator
@@ -58,7 +59,7 @@ def test_override(data_type_registry_fixture: DataTypeRegistry) -> None:
         data_type_registry_fixture.register(Bool._zarr_v3_name, Bool)
 
         class NewBool(Bool):
-            def default_value(self) -> np.bool_:
+            def default_scalar(self) -> np.bool_:
                 return np.True_
 
         data_type_registry_fixture.register(NewBool._zarr_v3_name, NewBool)
@@ -96,20 +97,36 @@ def test_unregistered_dtype(data_type_registry_fixture: DataTypeRegistry) -> Non
     @staticmethod
     @pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning")
     @pytest.mark.parametrize("zdtype", zdtype_examples)
-    def test_registered_dtypes(
-        zdtype: ZDType[TBaseDType, TBaseScalar], zarr_format: ZarrFormat
-    ) -> None:
+    def test_registered_dtypes_match_dtype(zdtype: ZDType[TBaseDType, TBaseScalar]) -> None:
         """
         Test that the registered dtypes can be retrieved from the registry.
         """
         skip_object_dtype(zdtype)
-        assert data_type_registry.match_dtype(zdtype.to_dtype()) == zdtype
-        assert (
-            data_type_registry.match_json(
-                zdtype.to_json(zarr_format=zarr_format), zarr_format=zarr_format
+        assert data_type_registry.match_dtype(zdtype.to_native_dtype()) == zdtype
+
+    @staticmethod
+    @pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning")
+    @pytest.mark.parametrize("zdtype", zdtype_examples)
+    def test_registered_dtypes_match_json(
+        zdtype: ZDType[TBaseDType, TBaseScalar], zarr_format: ZarrFormat
+    ) -> None:
+        if zarr_format == 2:
+            if isinstance(zdtype, HasObjectCodec):
+                object_codec_id = zdtype.object_codec_id
+            else:
+                object_codec_id = None
+            assert (
+                data_type_registry.match_json_v2(
+                    zdtype.to_json(zarr_format=zarr_format),  # type: ignore[arg-type]
+                    object_codec_id=object_codec_id,
+                )
+                == zdtype
+            )
+        else:
+            skip_object_dtype(zdtype)
+            assert (
+                data_type_registry.match_json_v3(zdtype.to_json(zarr_format=zarr_format)) == zdtype  # type: ignore[arg-type]
             )
-            == zdtype
-        )
 
     @staticmethod
     @pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning")
@@ -129,7 +146,7 @@ def test_match_dtype_unique(
             if _cls is not type(zdtype):
                 data_type_registry_fixture.register(_cls._zarr_v3_name, _cls)
 
-        dtype_instance = zdtype.to_dtype()
+        dtype_instance = zdtype.to_native_dtype()
 
         msg = f"No data type wrapper found that matches dtype '{dtype_instance}'"
         with pytest.raises(ValueError, match=re.escape(msg)):
@@ -138,7 +155,7 @@ def test_match_dtype_unique(
         instance_dict = zdtype.to_json(zarr_format=zarr_format)
         msg = f"No data type wrapper found that matches {instance_dict}"
         with pytest.raises(ValueError, match=re.escape(msg)):
-            data_type_registry_fixture.match_json(instance_dict, zarr_format=zarr_format)
+            data_type_registry_fixture.match_json_v3(instance_dict)  # type: ignore[arg-type]
 
 
 # this is copied from the registry tests -- we should deduplicate
@@ -161,9 +178,11 @@ def set_path() -> Generator[None, None, None]:
 def test_entrypoint_dtype(zarr_format: ZarrFormat) -> None:
     from package_with_entrypoint import TestDataType
 
+    data_type_registry.lazy_load()
     instance = TestDataType()
     dtype_json = instance.to_json(zarr_format=zarr_format)
-    assert get_data_type_from_json(dtype_json, zarr_format=zarr_format) == instance
+    assert get_data_type_from_json_v3(dtype_json) == instance
+    data_type_registry.unregister(TestDataType._zarr_v3_name)
 
 
 @pytest.mark.parametrize(
diff --git a/tests/test_group.py b/tests/test_group.py
index b4dace2568..c0a40bf5f9 100644
--- a/tests/test_group.py
+++ b/tests/test_group.py
@@ -23,6 +23,7 @@
 from zarr.core._info import GroupInfo
 from zarr.core.buffer import default_buffer_prototype
 from zarr.core.config import config as zarr_config
+from zarr.core.dtype.npy.int import UInt8
 from zarr.core.group import (
     ConsolidatedMetadata,
     GroupMetadata,
@@ -494,7 +495,7 @@ def test_group_child_iterators(store: Store, zarr_format: ZarrFormat, consolidat
     expected_groups = list(zip(expected_group_keys, expected_group_values, strict=False))
 
     fill_value = 3
-    dtype = "uint8"
+    dtype = UInt8()
 
     expected_group_values[0].create_group("subgroup")
     expected_group_values[0].create_array(
@@ -515,7 +516,7 @@ def test_group_child_iterators(store: Store, zarr_format: ZarrFormat, consolidat
             metadata = {
                 "subarray": {
                     "attributes": {},
-                    "dtype": dtype,
+                    "dtype": dtype.to_json(zarr_format=zarr_format),
                     "fill_value": fill_value,
                     "shape": (1,),
                     "chunks": (1,),
@@ -551,7 +552,7 @@ def test_group_child_iterators(store: Store, zarr_format: ZarrFormat, consolidat
                         {"configuration": {"endian": "little"}, "name": "bytes"},
                         {"configuration": {}, "name": "zstd"},
                     ),
-                    "data_type": dtype,
+                    "data_type": dtype.to_json(zarr_format=zarr_format),
                     "fill_value": fill_value,
                     "node_type": "array",
                     "shape": (1,),
diff --git a/tests/test_metadata/test_consolidated.py b/tests/test_metadata/test_consolidated.py
index b2244c5047..cfb548cc8d 100644
--- a/tests/test_metadata/test_consolidated.py
+++ b/tests/test_metadata/test_consolidated.py
@@ -594,7 +594,7 @@ async def test_consolidated_metadata_encodes_special_chars(
             "consolidated_metadata"
         ]["metadata"]
 
-    expected_fill_value = _time._zdtype.to_json_value(fill_value, zarr_format=2)
+    expected_fill_value = _time._zdtype.to_json_scalar(fill_value, zarr_format=2)
 
     if zarr_format == 2:
         assert root_metadata["time/.zarray"]["fill_value"] == expected_fill_value
diff --git a/tests/test_metadata/test_v2.py b/tests/test_metadata/test_v2.py
index 5fd3ae8cc6..a2894529aa 100644
--- a/tests/test_metadata/test_v2.py
+++ b/tests/test_metadata/test_v2.py
@@ -86,7 +86,7 @@ def test_filters_empty_tuple_warns() -> None:
         "zarr_format": 2,
         "shape": (1,),
         "chunks": (1,),
-        "dtype": "uint8",
+        "dtype": "|u1",
         "order": "C",
         "compressor": None,
         "filters": (),
diff --git a/tests/test_metadata/test_v3.py b/tests/test_metadata/test_v3.py
index f3bd4510e5..a806a438c7 100644
--- a/tests/test_metadata/test_v3.py
+++ b/tests/test_metadata/test_v3.py
@@ -128,10 +128,10 @@ def test_jsonify_fill_value_complex(fill_value: Any, dtype_str: str) -> None:
     """
     zarr_format = 3
     dtype = get_data_type_from_native_dtype(dtype_str)
-    expected = dtype.to_dtype().type(complex(*fill_value))
-    observed = dtype.from_json_value(fill_value, zarr_format=zarr_format)
+    expected = dtype.to_native_dtype().type(complex(*fill_value))
+    observed = dtype.from_json_scalar(fill_value, zarr_format=zarr_format)
     assert observed == expected
-    assert dtype.to_json_value(observed, zarr_format=zarr_format) == tuple(fill_value)
+    assert dtype.to_json_scalar(observed, zarr_format=zarr_format) == tuple(fill_value)
 
 
 @pytest.mark.parametrize("fill_value", [{"foo": 10}])
@@ -143,7 +143,7 @@ def test_parse_fill_value_invalid_type(fill_value: Any, dtype_str: str) -> None:
     """
     dtype_instance = get_data_type_from_native_dtype(dtype_str)
     with pytest.raises(TypeError, match=f"Invalid type: {fill_value}"):
-        dtype_instance.from_json_value(fill_value, zarr_format=3)
+        dtype_instance.from_json_scalar(fill_value, zarr_format=3)
 
 
 @pytest.mark.parametrize(
@@ -164,7 +164,7 @@ def test_parse_fill_value_invalid_type_sequence(fill_value: Any, dtype_str: str)
     """
     dtype_instance = get_data_type_from_native_dtype(dtype_str)
     with pytest.raises(TypeError, match=re.escape(f"Invalid type: {fill_value}")):
-        dtype_instance.from_json_value(fill_value, zarr_format=3)
+        dtype_instance.from_json_scalar(fill_value, zarr_format=3)
 
 
 @pytest.mark.parametrize("chunk_grid", ["regular"])
@@ -266,8 +266,8 @@ async def test_datetime_metadata(fill_value: int, precision: str) -> None:
         "data_type": dtype.to_json(zarr_format=3),
         "chunk_key_encoding": {"name": "default", "separator": "."},
         "codecs": (BytesCodec(),),
-        "fill_value": dtype.to_json_value(
-            dtype.to_dtype().type(fill_value, dtype.unit), zarr_format=3
+        "fill_value": dtype.to_json_scalar(
+            dtype.to_native_dtype().type(fill_value, dtype.unit), zarr_format=3
         ),
     }
     metadata = ArrayV3Metadata.from_dict(metadata_dict)
diff --git a/tests/test_properties.py b/tests/test_properties.py
index 15dd701582..ed8aa997c0 100644
--- a/tests/test_properties.py
+++ b/tests/test_properties.py
@@ -316,7 +316,7 @@ def test_array_metadata_meets_spec(meta: ArrayV2Metadata | ArrayV3Metadata) -> N
         assert asdict_dict["zarr_format"] == 3
 
     # version-agnostic validations
-    dtype_native = meta.dtype.to_dtype()
+    dtype_native = meta.dtype.to_native_dtype()
     if dtype_native.kind == "f":
         assert serialized_float_is_valid(asdict_dict["fill_value"])
     elif dtype_native.kind == "c":
diff --git a/tests/test_regression/test_regression.py b/tests/test_regression/test_regression.py
index 61ff8ebfa9..a5b77d9931 100644
--- a/tests/test_regression/test_regression.py
+++ b/tests/test_regression/test_regression.py
@@ -34,6 +34,7 @@ def runner_installed() -> bool:
 class ArrayParams:
     values: np.ndarray[tuple[int], np.dtype[np.generic]]
     fill_value: np.generic | str | int
+    filters: tuple[numcodecs.abc.Codec, ...] = ()
     compressor: numcodecs.abc.Codec
 
 
@@ -62,7 +63,8 @@ class ArrayParams:
     ArrayParams(
         values=np.array(["a", "bb", "ccc", "dddd"], dtype="O"),
         fill_value="1",
-        compressor=VLenUTF8(),
+        filters=(VLenUTF8(),),
+        compressor=GZip(),
     )
 ]
 array_cases = basic_array_cases + datetime_array_cases + string_array_cases + vlen_string_cases
@@ -86,9 +88,9 @@ def source_array(tmp_path: Path, request: pytest.FixtureRequest) -> Array:
         dtype=dtype,
         chunks=array_params.values.shape,
         compressors=compressor,
+        filters=array_params.filters,
         fill_value=array_params.fill_value,
         order="C",
-        filters=None,
         chunk_key_encoding=chunk_key_encoding,
         write_data=True,
         zarr_format=2,
diff --git a/tests/test_v2.py b/tests/test_v2.py
index 4b041a9b82..fa2aa65b22 100644
--- a/tests/test_v2.py
+++ b/tests/test_v2.py
@@ -62,8 +62,8 @@ def test_codec_pipeline() -> None:
 @pytest.mark.parametrize(
     ("dtype", "expected_dtype", "fill_value", "fill_value_json"),
     [
-        ("|S", "|S0", b"X", "WA=="),
-        ("|V", "|V0", b"X", "WA=="),
+        ("|S1", "|S1", b"X", "WA=="),
+        ("|V1", "|V1", b"X", "WA=="),
         ("|V10", "|V10", b"X", "WAAAAAAAAAAAAA=="),
     ],
 )
@@ -111,7 +111,7 @@ async def test_v2_encode_decode(dtype, expected_dtype, fill_value, fill_value_js
     ],
 )
 def test_v2_encode_decode_with_data(dtype: ZDType[Any, Any], value: str):
-    expected = np.full((3,), value, dtype=dtype.to_dtype())
+    expected = np.full((3,), value, dtype=dtype.to_native_dtype())
     a = zarr.create(
         shape=(3,),
         zarr_format=2,
@@ -278,8 +278,8 @@ def test_structured_dtype_roundtrip(fill_value, tmp_path) -> None:
 def test_parse_structured_fill_value_valid(
     fill_value: Any, dtype: np.dtype[Any], expected_result: Any
 ) -> None:
-    zdtype = Structured.from_dtype(dtype)
-    result = zdtype.cast_value(fill_value)
+    zdtype = Structured.from_native_dtype(dtype)
+    result = zdtype.cast_scalar(fill_value)
     assert result.dtype == expected_result.dtype
     assert result == expected_result
     if isinstance(expected_result, np.void):

From d6535d65facbe6be67cd9f0ed195d5e8656ffc07 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Thu, 29 May 2025 12:55:58 +0200
Subject: [PATCH 116/130] fix storage info discrepancy in docs

---
 docs/user-guide/arrays.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/user-guide/arrays.rst b/docs/user-guide/arrays.rst
index 13190a4689..c27f1296b9 100644
--- a/docs/user-guide/arrays.rst
+++ b/docs/user-guide/arrays.rst
@@ -211,8 +211,8 @@ prints additional diagnostics, e.g.::
    Serializer         : BytesCodec(endian=<Endian.little: 'little'>)
    Compressors        : (BloscCodec(typesize=4, cname=<BloscCname.zstd: 'zstd'>, clevel=3, shuffle=<BloscShuffle.bitshuffle: 'bitshuffle'>, blocksize=0),)
    No. bytes          : 400000000 (381.5M)
-   No. bytes stored   : 9696520
-   Storage ratio      : 41.3
+   No. bytes stored   : 3558573
+   Storage ratio      : 112.4
    Chunks Initialized : 100
 
 .. note::

From 42e14ef8856b13e863cfa538d6f4a2d08d61d10d Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Thu, 29 May 2025 17:29:05 +0200
Subject: [PATCH 117/130] fix docstring that was troubling sphinx

---
 src/zarr/core/dtype/wrapper.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/zarr/core/dtype/wrapper.py b/src/zarr/core/dtype/wrapper.py
index c9b23707e8..f3d6b0adca 100644
--- a/src/zarr/core/dtype/wrapper.py
+++ b/src/zarr/core/dtype/wrapper.py
@@ -246,7 +246,7 @@ def check_json_v2(
         object_codec_id : str | None
             The object codec ID, if applicable. Object codecs are specific numcodecs codecs that
             zarr-python 2.x used to serialize numpy "Object" scalars. For example, a dtype field set
-            to "|O" with an object codec ID of "vlen-utf8" indicates that the data type is a
+            to ``"|O"`` with an object codec ID of "vlen-utf8" indicates that the data type is a
             variable-length string.
 
             Zarr V3 has no such logic, so this parameter is only used for Zarr V2 compatibility.

From 3991406bcdac1cc76da01352a3abb54c1796e02e Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Thu, 29 May 2025 21:01:14 +0200
Subject: [PATCH 118/130] wip: add vlen-bytes

---
 src/zarr/core/dtype/npy/vlen_bytes.py    | 75 ++++++++++++++++++++++++
 tests/test_regression/test_regression.py | 18 +++++-
 2 files changed, 91 insertions(+), 2 deletions(-)
 create mode 100644 src/zarr/core/dtype/npy/vlen_bytes.py

diff --git a/src/zarr/core/dtype/npy/vlen_bytes.py b/src/zarr/core/dtype/npy/vlen_bytes.py
new file mode 100644
index 0000000000..6d804cac60
--- /dev/null
+++ b/src/zarr/core/dtype/npy/vlen_bytes.py
@@ -0,0 +1,75 @@
+from dataclasses import dataclass
+from typing import ClassVar, Literal, Self, TypeGuard, overload
+
+import numpy as np
+
+from zarr.core.common import JSON, ZarrFormat
+from zarr.core.dtype.common import HasObjectCodec
+from zarr.core.dtype.wrapper import TBaseDType, ZDType
+
+
+@dataclass(frozen=True, kw_only=True)
+class VariableLengthString(ZDType[np.dtypes.ObjectDType, str], HasObjectCodec):  # type: ignore[no-redef]
+    dtype_cls = np.dtypes.ObjectDType
+    _zarr_v3_name: ClassVar[Literal["variable_length_bytes"]] = "variable_length_bytes"
+    object_codec_id = "vlen-bytes"
+
+    @classmethod
+    def _from_native_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
+        return cls()
+
+    def to_native_dtype(self) -> np.dtypes.ObjectDType:
+        return self.dtype_cls()
+
+    @classmethod
+    def check_json_v2(
+        cls, data: JSON, *, object_codec_id: str | None = None
+    ) -> TypeGuard[Literal["|O"]]:
+        """
+        Check that the input is a valid JSON representation of a numpy O dtype, and that the
+        object codec id is appropriate for variable-length UTF-8 strings.
+        """
+        return data == "|O" and object_codec_id == cls.object_codec_id
+
+    @classmethod
+    def check_json_v3(cls, data: JSON) -> TypeGuard[Literal["variable_length_utf8"]]:
+        return data == cls._zarr_v3_name
+
+    @overload
+    def to_json(self, zarr_format: Literal[2]) -> Literal["|O"]: ...
+
+    @overload
+    def to_json(self, zarr_format: Literal[3]) -> Literal["variable_length_utf8"]: ...
+
+    def to_json(self, zarr_format: ZarrFormat) -> Literal["|O", "variable_length_utf8"]:
+        if zarr_format == 2:
+            return "|O"
+        elif zarr_format == 3:
+            return self._zarr_v3_name
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+
+    @classmethod
+    def _from_json_unchecked(
+        cls, data: DTypeJSON_V2 | DTypeJSON_V3, *, zarr_format: ZarrFormat
+    ) -> Self:
+        return cls()
+
+    def default_scalar(self) -> str:
+        return ""
+
+    def to_json_scalar(self, data: object, *, zarr_format: ZarrFormat) -> str:
+        return data  # type: ignore[return-value]
+
+    def from_json_scalar(self, data: JSON, *, zarr_format: ZarrFormat) -> str:
+        """
+        Strings pass through
+        """
+        if not check_json_str(data):
+            raise TypeError(f"Invalid type: {data}. Expected a string.")
+        return data
+
+    def check_scalar(self, data: object) -> bool:
+        return isinstance(data, str)
+
+    def _cast_scalar_unchecked(self, data: object) -> str:
+        return str(data)
diff --git a/tests/test_regression/test_regression.py b/tests/test_regression/test_regression.py
index a5b77d9931..83a917dee8 100644
--- a/tests/test_regression/test_regression.py
+++ b/tests/test_regression/test_regression.py
@@ -7,7 +7,7 @@
 import numcodecs
 import numpy as np
 import pytest
-from numcodecs import LZ4, LZMA, Blosc, GZip, VLenUTF8, Zstd
+from numcodecs import LZ4, LZMA, Blosc, GZip, VLenBytes, VLenUTF8, Zstd
 
 import zarr
 from zarr.core.array import Array
@@ -67,7 +67,21 @@ class ArrayParams:
         compressor=GZip(),
     )
 ]
-array_cases = basic_array_cases + datetime_array_cases + string_array_cases + vlen_string_cases
+vlen_bytes_cases = [
+    ArrayParams(
+        values=np.array([b"a", b"bb", b"ccc", b"dddd"], dtype="O"),
+        fill_value=b"1",
+        filters=(VLenBytes(),),
+        compressor=GZip(),
+    )
+]
+array_cases = (
+    basic_array_cases
+    + datetime_array_cases
+    + string_array_cases
+    + vlen_string_cases
+    + vlen_bytes_cases
+)
 
 
 @pytest.fixture

From d7da3d9136f44d65341c11d2f9026bbbce72e6e8 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Thu, 29 May 2025 22:03:00 +0200
Subject: [PATCH 119/130] add vlen-bytes

---
 src/zarr/core/dtype/__init__.py          |  3 ++
 src/zarr/core/dtype/npy/vlen_bytes.py    | 36 ++++++++++++------------
 src/zarr/core/dtype/wrapper.py           |  6 ++--
 tests/test_regression/test_regression.py |  7 +++--
 4 files changed, 29 insertions(+), 23 deletions(-)

diff --git a/src/zarr/core/dtype/__init__.py b/src/zarr/core/dtype/__init__.py
index a8bfe2b5c4..575086cb6f 100644
--- a/src/zarr/core/dtype/__init__.py
+++ b/src/zarr/core/dtype/__init__.py
@@ -12,6 +12,7 @@
     Structured,
 )
 from zarr.core.dtype.npy.time import DateTime64, TimeDelta64
+from zarr.core.dtype.npy.vlen_bytes import VariableLengthBytes
 
 if TYPE_CHECKING:
     from zarr.core.common import ZarrFormat
@@ -88,6 +89,7 @@
     | FixedLengthBytes
     | Structured
     | TimeDType
+    | VariableLengthBytes
 )
 # mypy has trouble inferring the type of variablelengthstring dtype, because its class definition
 # depends on the installed numpy version. That's why the type: ignore statement is needed here.
@@ -100,6 +102,7 @@
     FixedLengthBytes,
     Structured,
     *TIME_DTYPE,
+    VariableLengthBytes,
 )
 
 # This type models inputs that can be coerced to a ZDType
diff --git a/src/zarr/core/dtype/npy/vlen_bytes.py b/src/zarr/core/dtype/npy/vlen_bytes.py
index 6d804cac60..c25523f9ed 100644
--- a/src/zarr/core/dtype/npy/vlen_bytes.py
+++ b/src/zarr/core/dtype/npy/vlen_bytes.py
@@ -1,15 +1,17 @@
+import base64
 from dataclasses import dataclass
 from typing import ClassVar, Literal, Self, TypeGuard, overload
 
 import numpy as np
 
 from zarr.core.common import JSON, ZarrFormat
-from zarr.core.dtype.common import HasObjectCodec
-from zarr.core.dtype.wrapper import TBaseDType, ZDType
+from zarr.core.dtype.common import HasObjectCodec, v3_unstable_dtype_warning
+from zarr.core.dtype.npy.common import check_json_str
+from zarr.core.dtype.wrapper import DTypeJSON_V2, DTypeJSON_V3, TBaseDType, ZDType
 
 
 @dataclass(frozen=True, kw_only=True)
-class VariableLengthString(ZDType[np.dtypes.ObjectDType, str], HasObjectCodec):  # type: ignore[no-redef]
+class VariableLengthBytes(ZDType[np.dtypes.ObjectDType, bytes], HasObjectCodec):
     dtype_cls = np.dtypes.ObjectDType
     _zarr_v3_name: ClassVar[Literal["variable_length_bytes"]] = "variable_length_bytes"
     object_codec_id = "vlen-bytes"
@@ -39,12 +41,13 @@ def check_json_v3(cls, data: JSON) -> TypeGuard[Literal["variable_length_utf8"]]
     def to_json(self, zarr_format: Literal[2]) -> Literal["|O"]: ...
 
     @overload
-    def to_json(self, zarr_format: Literal[3]) -> Literal["variable_length_utf8"]: ...
+    def to_json(self, zarr_format: Literal[3]) -> Literal["variable_length_bytes"]: ...
 
-    def to_json(self, zarr_format: ZarrFormat) -> Literal["|O", "variable_length_utf8"]:
+    def to_json(self, zarr_format: ZarrFormat) -> Literal["|O", "variable_length_bytes"]:
         if zarr_format == 2:
             return "|O"
         elif zarr_format == 3:
+            v3_unstable_dtype_warning(self)
             return self._zarr_v3_name
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
@@ -54,22 +57,19 @@ def _from_json_unchecked(
     ) -> Self:
         return cls()
 
-    def default_scalar(self) -> str:
-        return ""
+    def default_scalar(self) -> bytes:
+        return b""
 
     def to_json_scalar(self, data: object, *, zarr_format: ZarrFormat) -> str:
-        return data  # type: ignore[return-value]
+        return base64.standard_b64encode(data).decode("ascii")  # type: ignore[arg-type]
 
-    def from_json_scalar(self, data: JSON, *, zarr_format: ZarrFormat) -> str:
-        """
-        Strings pass through
-        """
-        if not check_json_str(data):
-            raise TypeError(f"Invalid type: {data}. Expected a string.")
-        return data
+    def from_json_scalar(self, data: JSON, *, zarr_format: ZarrFormat) -> bytes:
+        if check_json_str(data):
+            return base64.standard_b64decode(data.encode("ascii"))
+        raise TypeError(f"Invalid type: {data}. Expected a string.")  # pragma: no cover
 
     def check_scalar(self, data: object) -> bool:
-        return isinstance(data, str)
+        return isinstance(data, bytes | str)
 
-    def _cast_scalar_unchecked(self, data: object) -> str:
-        return str(data)
+    def _cast_scalar_unchecked(self, data: object) -> bytes:
+        return bytes(data)  # type: ignore[no-any-return, call-overload]
diff --git a/src/zarr/core/dtype/wrapper.py b/src/zarr/core/dtype/wrapper.py
index f3d6b0adca..4c399bbb84 100644
--- a/src/zarr/core/dtype/wrapper.py
+++ b/src/zarr/core/dtype/wrapper.py
@@ -45,7 +45,7 @@
 
 # This the upper bound for the scalar types we support. It's numpy scalars + str,
 # because the new variable-length string dtype in numpy does not have a corresponding scalar type
-TBaseScalar = np.generic | str
+TBaseScalar = np.generic | str | bytes
 # This is the bound for the dtypes that we support. If we support non-numpy dtypes,
 # then this bound will need to be widened.
 TBaseDType = np.dtype[np.generic]
@@ -174,8 +174,8 @@ def cast_scalar(self, data: object) -> TScalar_co:
         if self.check_scalar(data):
             return self._cast_scalar_unchecked(data)
         msg = (
-            f"The value {data} failed a type check. "
-            f"It cannot be safely cast to a scalar compatible with {self.dtype_cls}. "
+            f"The value {data!r} failed a type check. "
+            f"It cannot be safely cast to a scalar compatible with {self}. "
             f"Consult the documentation for {self} to determine the possible values that can "
             "be cast to scalars of the wrapped data type."
         )
diff --git a/tests/test_regression/test_regression.py b/tests/test_regression/test_regression.py
index 83a917dee8..a1d13510c3 100644
--- a/tests/test_regression/test_regression.py
+++ b/tests/test_regression/test_regression.py
@@ -13,6 +13,7 @@
 from zarr.core.array import Array
 from zarr.core.chunk_key_encodings import V2ChunkKeyEncoding
 from zarr.core.dtype.npy.string import VariableLengthString
+from zarr.core.dtype.npy.vlen_bytes import VariableLengthBytes
 from zarr.storage import LocalStore
 
 if TYPE_CHECKING:
@@ -33,7 +34,7 @@ def runner_installed() -> bool:
 @dataclass(kw_only=True)
 class ArrayParams:
     values: np.ndarray[tuple[int], np.dtype[np.generic]]
-    fill_value: np.generic | str | int
+    fill_value: np.generic | str | int | bytes
     filters: tuple[numcodecs.abc.Codec, ...] = ()
     compressor: numcodecs.abc.Codec
 
@@ -92,8 +93,10 @@ def source_array(tmp_path: Path, request: pytest.FixtureRequest) -> Array:
     compressor = array_params.compressor
     chunk_key_encoding = V2ChunkKeyEncoding(separator="/")
     dtype: ZDTypeLike
-    if array_params.values.dtype == np.dtype("|O"):
+    if array_params.values.dtype == np.dtype("|O") and array_params.filters == (VLenUTF8(),):
         dtype = VariableLengthString()  # type: ignore[assignment]
+    elif array_params.values.dtype == np.dtype("|O") and array_params.filters == (VLenBytes(),):
+        dtype = VariableLengthBytes()
     else:
         dtype = array_params.values.dtype
     z = zarr.create_array(

From 1f767e45450ab758ffbe34bbead1a508ae82f9b5 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Fri, 6 Jun 2025 15:20:01 +0300
Subject: [PATCH 120/130] replace placeholder text with links to a github issue

---
 src/zarr/core/dtype/registry.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/zarr/core/dtype/registry.py b/src/zarr/core/dtype/registry.py
index 0423f69dbe..308bde602c 100644
--- a/src/zarr/core/dtype/registry.py
+++ b/src/zarr/core/dtype/registry.py
@@ -56,7 +56,7 @@ def match_dtype(self, dtype: TBaseDType) -> ZDType[TBaseDType, TBaseScalar]:
                 "data type. "
                 "In this case you should construct your array by providing a specific Zarr data "
                 'type. For a list of Zarr data types that are compatible with the numpy "Object"'
-                "data type, see xxxxxxxxxxx"
+                "data type, see https://github.com/zarr-developers/zarr-python/issues/3117"
             )
             raise ValueError(msg)
         matched: list[ZDType[TBaseDType, TBaseScalar]] = []
@@ -71,7 +71,7 @@ def match_dtype(self, dtype: TBaseDType) -> ZDType[TBaseDType, TBaseScalar]:
                 f"Multiple data type wrappers found that match dtype '{dtype}': {matched}. "
                 "You should unregister one of these data types, or avoid Zarr data type inference "
                 "entirely by providing a specific Zarr data type when creating your array."
-                "For more information, see xxxxxxxxxxxxxxxxxx"
+                "For more information, see https://github.com/zarr-developers/zarr-python/issues/3117"
             )
             raise ValueError(msg)
         raise ValueError(f"No data type wrapper found that matches dtype '{dtype}'")

From cf5504194723d25cdbd38db5c2d6ed5b4c0022dc Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Fri, 6 Jun 2025 19:25:30 +0300
Subject: [PATCH 121/130] refactor fixed-length bytes dtypes

---
 src/zarr/core/config.py                       |   4 +-
 src/zarr/core/dtype/__init__.py               |  25 +-
 src/zarr/core/dtype/npy/bool.py               |   8 +-
 src/zarr/core/dtype/npy/bytes.py              | 283 ++++++++++++++++++
 src/zarr/core/dtype/npy/complex.py            |   8 +-
 src/zarr/core/dtype/npy/float.py              |   8 +-
 src/zarr/core/dtype/npy/int.py                |  24 +-
 src/zarr/core/dtype/npy/string.py             | 153 +++-------
 .../dtype/npy/{sized.py => structured.py}     | 132 +-------
 src/zarr/core/dtype/npy/time.py               |  12 +-
 src/zarr/core/dtype/npy/vlen_bytes.py         |  75 -----
 src/zarr/core/dtype/wrapper.py                |  40 +--
 src/zarr/core/metadata/v3.py                  |   4 +-
 tests/test_array.py                           |  12 +-
 tests/test_codecs/test_vlen.py                |   1 +
 tests/test_config.py                          |   4 +-
 tests/test_dtype/conftest.py                  |   2 +-
 tests/test_dtype/test_npy/test_bytes.py       | 138 +++++++++
 tests/test_dtype/test_npy/test_string.py      |  78 +----
 .../{test_sized.py => test_structured.py}     |  47 ---
 tests/test_dtype/test_wrapper.py              |   4 +-
 tests/test_properties.py                      |  10 +
 tests/test_regression/test_regression.py      |  20 +-
 tests/test_v2.py                              |   7 +-
 24 files changed, 599 insertions(+), 500 deletions(-)
 create mode 100644 src/zarr/core/dtype/npy/bytes.py
 rename src/zarr/core/dtype/npy/{sized.py => structured.py} (56%)
 delete mode 100644 src/zarr/core/dtype/npy/vlen_bytes.py
 create mode 100644 tests/test_dtype/test_npy/test_bytes.py
 rename tests/test_dtype/test_npy/{test_sized.py => test_structured.py} (71%)

diff --git a/src/zarr/core/config.py b/src/zarr/core/config.py
index 97df060cb8..74e9bdd8dd 100644
--- a/src/zarr/core/config.py
+++ b/src/zarr/core/config.py
@@ -149,8 +149,8 @@ def categorize_data_type(dtype: ZDType[Any, Any]) -> DTypeCategory:
     This is used by the config system to determine how to encode arrays with the associated data type
     when the user has not specified a particular serialization scheme.
     """
-    from zarr.core.dtype import VariableLengthString
+    from zarr.core.dtype import VariableLengthUTF8
 
-    if isinstance(dtype, VariableLengthString):
+    if isinstance(dtype, VariableLengthUTF8):
         return "variable-length-string"
     return "default"
diff --git a/src/zarr/core/dtype/__init__.py b/src/zarr/core/dtype/__init__.py
index 575086cb6f..25e5163e43 100644
--- a/src/zarr/core/dtype/__init__.py
+++ b/src/zarr/core/dtype/__init__.py
@@ -4,15 +4,14 @@
 
 from zarr.core.dtype.common import DataTypeValidationError
 from zarr.core.dtype.npy.bool import Bool
+from zarr.core.dtype.npy.bytes import NullTerminatedBytes, RawBytes, VariableLengthBytes
 from zarr.core.dtype.npy.complex import Complex64, Complex128
 from zarr.core.dtype.npy.float import Float16, Float32, Float64
 from zarr.core.dtype.npy.int import Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64
-from zarr.core.dtype.npy.sized import (
-    FixedLengthBytes,
+from zarr.core.dtype.npy.structured import (
     Structured,
 )
 from zarr.core.dtype.npy.time import DateTime64, TimeDelta64
-from zarr.core.dtype.npy.vlen_bytes import VariableLengthBytes
 
 if TYPE_CHECKING:
     from zarr.core.common import ZarrFormat
@@ -24,9 +23,8 @@
 
 from zarr.core.common import JSON
 from zarr.core.dtype.npy.string import (
-    FixedLengthASCII,
     FixedLengthUTF32,
-    VariableLengthString,
+    VariableLengthUTF8,
 )
 from zarr.core.dtype.registry import DataTypeRegistry
 from zarr.core.dtype.wrapper import TBaseDType, TBaseScalar, ZDType
@@ -38,8 +36,6 @@
     "DataTypeRegistry",
     "DataTypeValidationError",
     "DateTime64",
-    "FixedLengthASCII",
-    "FixedLengthBytes",
     "FixedLengthUTF32",
     "Float16",
     "Float32",
@@ -48,6 +44,8 @@
     "Int16",
     "Int32",
     "Int64",
+    "NullTerminatedBytes",
+    "RawBytes",
     "Structured",
     "TBaseDType",
     "TBaseScalar",
@@ -57,7 +55,7 @@
     "UInt16",
     "UInt32",
     "UInt64",
-    "VariableLengthString",
+    "VariableLengthUTF8",
     "ZDType",
     "data_type_registry",
     "parse_data_type",
@@ -74,19 +72,22 @@
 ComplexFloatDType = Complex64 | Complex128
 COMPLEX_FLOAT_DTYPE: Final = Complex64, Complex128
 
-StringDType = FixedLengthUTF32 | VariableLengthString | FixedLengthASCII
-STRING_DTYPE: Final = FixedLengthUTF32, VariableLengthString, FixedLengthASCII
+StringDType = FixedLengthUTF32 | VariableLengthUTF8
+STRING_DTYPE: Final = FixedLengthUTF32, VariableLengthUTF8
 
 TimeDType = DateTime64 | TimeDelta64
 TIME_DTYPE: Final = DateTime64, TimeDelta64
 
+BytesDType = RawBytes | NullTerminatedBytes | VariableLengthBytes
+BYTES_DTYPE: Final = RawBytes, NullTerminatedBytes, VariableLengthBytes
+
 AnyDType = (
     Bool
     | IntegerDType
     | FloatDType
     | ComplexFloatDType
     | StringDType
-    | FixedLengthBytes
+    | BytesDType
     | Structured
     | TimeDType
     | VariableLengthBytes
@@ -99,7 +100,7 @@
     *FLOAT_DTYPE,
     *COMPLEX_FLOAT_DTYPE,
     *STRING_DTYPE,
-    FixedLengthBytes,
+    *BYTES_DTYPE,
     Structured,
     *TIME_DTYPE,
     VariableLengthBytes,
diff --git a/src/zarr/core/dtype/npy/bool.py b/src/zarr/core/dtype/npy/bool.py
index b1800127e8..bee42b6a13 100644
--- a/src/zarr/core/dtype/npy/bool.py
+++ b/src/zarr/core/dtype/npy/bool.py
@@ -27,14 +27,14 @@ class Bool(ZDType[np.dtypes.BoolDType, np.bool_], HasItemSize):
     dtype_cls = np.dtypes.BoolDType
 
     @classmethod
-    def _from_native_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
+    def _from_native_dtype_unchecked(cls, dtype: TBaseDType) -> Self:
         return cls()
 
     def to_native_dtype(self: Self) -> np.dtypes.BoolDType:
         return self.dtype_cls()
 
     @classmethod
-    def check_json_v2(
+    def _check_json_v2(
         cls, data: JSON, *, object_codec_id: str | None = None
     ) -> TypeGuard[Literal["|b1"]]:
         """
@@ -43,7 +43,7 @@ def check_json_v2(
         return data in cls._zarr_v2_names
 
     @classmethod
-    def check_json_v3(cls, data: JSON) -> TypeGuard[Literal["bool"]]:
+    def _check_json_v3(cls, data: JSON) -> TypeGuard[Literal["bool"]]:
         return data == cls._zarr_v3_name
 
     @overload
@@ -114,7 +114,7 @@ def from_json_scalar(self, data: JSON, *, zarr_format: ZarrFormat) -> np.bool_:
             return self._cast_scalar_unchecked(data)
         raise TypeError(f"Invalid type: {data}. Expected a boolean.")  # pragma: no cover
 
-    def check_scalar(self, data: object) -> bool:
+    def _check_scalar(self, data: object) -> bool:
         # Anything can become a bool
         return True
 
diff --git a/src/zarr/core/dtype/npy/bytes.py b/src/zarr/core/dtype/npy/bytes.py
new file mode 100644
index 0000000000..9d815ab849
--- /dev/null
+++ b/src/zarr/core/dtype/npy/bytes.py
@@ -0,0 +1,283 @@
+import base64
+import re
+from dataclasses import dataclass
+from typing import Any, ClassVar, Literal, Self, TypedDict, TypeGuard, cast, overload
+
+import numpy as np
+
+from zarr.core.common import JSON, NamedConfig, ZarrFormat
+from zarr.core.dtype.common import HasItemSize, HasLength, HasObjectCodec, v3_unstable_dtype_warning
+from zarr.core.dtype.npy.common import check_json_str
+from zarr.core.dtype.wrapper import DTypeJSON_V2, DTypeJSON_V3, TBaseDType, ZDType
+
+
+class FixedLengthBytesConfig(TypedDict):
+    length_bytes: int
+
+
+NTBytesJSONV3 = NamedConfig[Literal["null_terminated_bytes"], FixedLengthBytesConfig]
+RawBytesJSONV3 = NamedConfig[Literal["raw_bytes"], FixedLengthBytesConfig]
+
+
+@dataclass(frozen=True, kw_only=True)
+class NullTerminatedBytes(ZDType[np.dtypes.BytesDType[int], np.bytes_], HasLength, HasItemSize):
+    dtype_cls = np.dtypes.BytesDType
+    _zarr_v3_name: ClassVar[Literal["null_terminated_bytes"]] = "null_terminated_bytes"
+
+    @classmethod
+    def _from_native_dtype_unchecked(cls, dtype: TBaseDType) -> Self:
+        return cls(length=dtype.itemsize)
+
+    def to_native_dtype(self) -> np.dtypes.BytesDType[int]:
+        return self.dtype_cls(self.length)
+
+    @classmethod
+    def _check_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> TypeGuard[str]:
+        """
+        Check that the input is a valid JSON representation of a numpy S dtype.
+        """
+        # match |S1, |S2, etc
+        return isinstance(data, str) and re.match(r"^\|S\d+$", data) is not None
+
+    @classmethod
+    def _check_json_v3(cls, data: JSON) -> TypeGuard[NTBytesJSONV3]:
+        return (
+            isinstance(data, dict)
+            and set(data.keys()) == {"name", "configuration"}
+            and data["name"] == cls._zarr_v3_name
+            and isinstance(data["configuration"], dict)
+            and "length_bytes" in data["configuration"]
+        )
+
+    @overload
+    def to_json(self, zarr_format: Literal[2]) -> str: ...
+
+    @overload
+    def to_json(self, zarr_format: Literal[3]) -> NTBytesJSONV3: ...
+
+    def to_json(self, zarr_format: ZarrFormat) -> str | NTBytesJSONV3:
+        if zarr_format == 2:
+            return self.to_native_dtype().str
+        elif zarr_format == 3:
+            v3_unstable_dtype_warning(self)
+            return {
+                "name": self._zarr_v3_name,
+                "configuration": {"length_bytes": self.length},
+            }
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+
+    @classmethod
+    def _from_json_unchecked(
+        cls, data: DTypeJSON_V2 | DTypeJSON_V3, *, zarr_format: ZarrFormat
+    ) -> Self:
+        if zarr_format == 2:
+            return cls.from_native_dtype(np.dtype(data))  # type: ignore[arg-type]
+        elif zarr_format == 3:
+            return cls(length=data["configuration"]["length_bytes"])  # type: ignore[index, call-overload]
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+
+    def default_scalar(self) -> np.bytes_:
+        return np.bytes_(b"")
+
+    def to_json_scalar(self, data: object, *, zarr_format: ZarrFormat) -> str:
+        as_bytes = self.cast_scalar(data)
+        return base64.standard_b64encode(as_bytes).decode("ascii")
+
+    def from_json_scalar(self, data: JSON, *, zarr_format: ZarrFormat) -> np.bytes_:
+        if check_json_str(data):
+            return self.to_native_dtype().type(base64.standard_b64decode(data.encode("ascii")))
+        raise TypeError(
+            f"Invalid type: {data}. Expected a base64-encoded string."
+        )  # pragma: no cover
+
+    def _check_scalar(self, data: object) -> bool:
+        # this is generous for backwards compatibility
+        return isinstance(data, np.bytes_ | str | bytes | int)
+
+    def _cast_scalar_unchecked(self, data: object) -> np.bytes_:
+        # We explicitly truncate the result because of the following numpy behavior:
+        # >>> x = np.dtype('S3').type('hello world')
+        # >>> x
+        # np.bytes_(b'hello world')
+        # >>> x.dtype
+        # dtype('S11')
+
+        if isinstance(data, int):
+            return self.to_native_dtype().type(str(data)[: self.length])
+        else:
+            return self.to_native_dtype().type(data[: self.length])  # type: ignore[index]
+
+    @property
+    def item_size(self) -> int:
+        return self.length
+
+
+@dataclass(frozen=True, kw_only=True)
+class RawBytes(ZDType[np.dtypes.VoidDType[int], np.void], HasLength, HasItemSize):
+    # np.dtypes.VoidDType is specified in an odd way in numpy
+    # it cannot be used to create instances of the dtype
+    # so we have to tell mypy to ignore this here
+    dtype_cls = np.dtypes.VoidDType  # type: ignore[assignment]
+    _zarr_v3_name: ClassVar[Literal["raw_bytes"]] = "raw_bytes"
+
+    @classmethod
+    def _from_native_dtype_unchecked(cls, dtype: TBaseDType) -> Self:
+        return cls(length=dtype.itemsize)
+
+    def to_native_dtype(self) -> np.dtypes.VoidDType[int]:
+        # Numpy does not allow creating a void type
+        # by invoking np.dtypes.VoidDType directly
+        return cast("np.dtypes.VoidDType[int]", np.dtype(f"V{self.length}"))
+
+    @classmethod
+    def _check_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> TypeGuard[str]:
+        # Check that the dtype is |V1, |V2, ...
+        return isinstance(data, str) and re.match(r"^\|V\d+$", data) is not None
+
+    @classmethod
+    def _check_json_v3(cls, data: JSON) -> TypeGuard[RawBytesJSONV3]:
+        return (
+            isinstance(data, dict)
+            and set(data.keys()) == {"name", "configuration"}
+            and data["name"] == cls._zarr_v3_name
+            and isinstance(data["configuration"], dict)
+            and set(data["configuration"].keys()) == {"length_bytes"}
+        )
+
+    @overload
+    def to_json(self, zarr_format: Literal[2]) -> str: ...
+
+    @overload
+    def to_json(self, zarr_format: Literal[3]) -> RawBytesJSONV3: ...
+
+    def to_json(self, zarr_format: ZarrFormat) -> str | RawBytesJSONV3:
+        if zarr_format == 2:
+            return self.to_native_dtype().str
+        elif zarr_format == 3:
+            return {"name": self._zarr_v3_name, "configuration": {"length_bytes": self.length}}
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+
+    @classmethod
+    def _from_json_unchecked(
+        cls, data: DTypeJSON_V2 | DTypeJSON_V3, *, zarr_format: ZarrFormat
+    ) -> Self:
+        if zarr_format == 2:
+            return cls.from_native_dtype(np.dtype(data))  # type: ignore[arg-type]
+        elif zarr_format == 3:
+            return cls(length=data["configuration"]["length_bytes"])  # type: ignore[index, call-overload]
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+
+    @classmethod
+    def _check_native_dtype(
+        cls: type[Self], dtype: TBaseDType
+    ) -> TypeGuard[np.dtypes.VoidDType[Any]]:
+        """
+        Numpy void dtype comes in two forms:
+        * If the ``fields`` attribute is ``None``, then the dtype represents N raw bytes.
+        * If the ``fields`` attribute is not ``None``, then the dtype represents a structured dtype,
+
+        In this check we ensure that ``fields`` is ``None``.
+
+        Parameters
+        ----------
+        dtype : TDType
+            The dtype to check.
+
+        Returns
+        -------
+        Bool
+            True if the dtype matches, False otherwise.
+        """
+        return cls.dtype_cls is type(dtype) and dtype.fields is None  # type: ignore[has-type]
+
+    def default_scalar(self) -> np.void:
+        return self.to_native_dtype().type(("\x00" * self.length).encode("ascii"))
+
+    def to_json_scalar(self, data: object, *, zarr_format: ZarrFormat) -> str:
+        return base64.standard_b64encode(self.cast_scalar(data).tobytes()).decode("ascii")
+
+    def from_json_scalar(self, data: JSON, *, zarr_format: ZarrFormat) -> np.void:
+        if check_json_str(data):
+            return self.to_native_dtype().type(base64.standard_b64decode(data))
+        raise TypeError(f"Invalid type: {data}. Expected a string.")  # pragma: no cover
+
+    def _check_scalar(self, data: object) -> bool:
+        return isinstance(data, np.bytes_ | str | bytes | np.void)
+
+    def _cast_scalar_unchecked(self, data: object) -> np.void:
+        native_dtype = self.to_native_dtype()
+        # Without the second argument, numpy will return a void scalar for dtype V1.
+        # The second argument ensures that, if native_dtype is something like V10,
+        # the result will actually be a V10 scalar.
+        return native_dtype.type(data, native_dtype)
+
+    @property
+    def item_size(self) -> int:
+        return self.length
+
+
+@dataclass(frozen=True, kw_only=True)
+class VariableLengthBytes(ZDType[np.dtypes.ObjectDType, bytes], HasObjectCodec):
+    dtype_cls = np.dtypes.ObjectDType
+    _zarr_v3_name: ClassVar[Literal["variable_length_bytes"]] = "variable_length_bytes"
+    object_codec_id = "vlen-bytes"
+
+    @classmethod
+    def _from_native_dtype_unchecked(cls, dtype: TBaseDType) -> Self:
+        return cls()
+
+    def to_native_dtype(self) -> np.dtypes.ObjectDType:
+        return self.dtype_cls()
+
+    @classmethod
+    def _check_json_v2(
+        cls, data: JSON, *, object_codec_id: str | None = None
+    ) -> TypeGuard[Literal["|O"]]:
+        """
+        Check that the input is a valid JSON representation of a numpy O dtype, and that the
+        object codec id is appropriate for variable-length UTF-8 strings.
+        """
+        return data == "|O" and object_codec_id == cls.object_codec_id
+
+    @classmethod
+    def _check_json_v3(cls, data: JSON) -> TypeGuard[Literal["variable_length_bytes"]]:
+        return data == cls._zarr_v3_name
+
+    @overload
+    def to_json(self, zarr_format: Literal[2]) -> Literal["|O"]: ...
+
+    @overload
+    def to_json(self, zarr_format: Literal[3]) -> Literal["variable_length_bytes"]: ...
+
+    def to_json(self, zarr_format: ZarrFormat) -> Literal["|O", "variable_length_bytes"]:
+        if zarr_format == 2:
+            return "|O"
+        elif zarr_format == 3:
+            v3_unstable_dtype_warning(self)
+            return self._zarr_v3_name
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+
+    @classmethod
+    def _from_json_unchecked(
+        cls, data: DTypeJSON_V2 | DTypeJSON_V3, *, zarr_format: ZarrFormat
+    ) -> Self:
+        return cls()
+
+    def default_scalar(self) -> bytes:
+        return b""
+
+    def to_json_scalar(self, data: object, *, zarr_format: ZarrFormat) -> str:
+        return base64.standard_b64encode(data).decode("ascii")  # type: ignore[arg-type]
+
+    def from_json_scalar(self, data: JSON, *, zarr_format: ZarrFormat) -> bytes:
+        if check_json_str(data):
+            return base64.standard_b64decode(data.encode("ascii"))
+        raise TypeError(f"Invalid type: {data}. Expected a string.")  # pragma: no cover
+
+    def _check_scalar(self, data: object) -> bool:
+        return isinstance(data, bytes | str)
+
+    def _cast_scalar_unchecked(self, data: object) -> bytes:
+        if isinstance(data, str):
+            return bytes(data, encoding="utf-8")
+        return bytes(data)  # type: ignore[no-any-return, call-overload]
diff --git a/src/zarr/core/dtype/npy/complex.py b/src/zarr/core/dtype/npy/complex.py
index f7db6fe94d..f68640e4ce 100644
--- a/src/zarr/core/dtype/npy/complex.py
+++ b/src/zarr/core/dtype/npy/complex.py
@@ -37,7 +37,7 @@ class BaseComplex(ZDType[TComplexDType_co, TComplexScalar_co], HasEndianness, Ha
     _zarr_v2_names: ClassVar[tuple[str, ...]]
 
     @classmethod
-    def _from_native_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
+    def _from_native_dtype_unchecked(cls, dtype: TBaseDType) -> Self:
         byte_order = cast("EndiannessNumpy", dtype.byteorder)
         return cls(endianness=endianness_from_numpy_str(byte_order))
 
@@ -76,17 +76,17 @@ def _from_json_unchecked(
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     @classmethod
-    def check_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> TypeGuard[str]:
+    def _check_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> TypeGuard[str]:
         """
         Check that the input is a valid JSON representation of this data type.
         """
         return data in cls._zarr_v2_names
 
     @classmethod
-    def check_json_v3(cls, data: JSON) -> TypeGuard[str]:
+    def _check_json_v3(cls, data: JSON) -> TypeGuard[str]:
         return data == cls._zarr_v3_name
 
-    def check_scalar(self, data: object) -> bool:
+    def _check_scalar(self, data: object) -> bool:
         return isinstance(data, ComplexLike)
 
     def _cast_scalar_unchecked(self, data: object) -> TComplexScalar_co:
diff --git a/src/zarr/core/dtype/npy/float.py b/src/zarr/core/dtype/npy/float.py
index 174b2338ae..f87f032581 100644
--- a/src/zarr/core/dtype/npy/float.py
+++ b/src/zarr/core/dtype/npy/float.py
@@ -28,7 +28,7 @@ class BaseFloat(ZDType[TFloatDType_co, TFloatScalar_co], HasEndianness, HasItemS
     _zarr_v2_names: ClassVar[tuple[str, ...]]
 
     @classmethod
-    def _from_native_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
+    def _from_native_dtype_unchecked(cls, dtype: TBaseDType) -> Self:
         byte_order = cast("EndiannessNumpy", dtype.byteorder)
         return cls(endianness=endianness_from_numpy_str(byte_order))
 
@@ -67,17 +67,17 @@ def _from_json_unchecked(
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     @classmethod
-    def check_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> TypeGuard[str]:
+    def _check_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> TypeGuard[str]:
         """
         Check that the input is a valid JSON representation of this data type.
         """
         return data in cls._zarr_v2_names
 
     @classmethod
-    def check_json_v3(cls, data: JSON) -> TypeGuard[str]:
+    def _check_json_v3(cls, data: JSON) -> TypeGuard[str]:
         return data == cls._zarr_v3_name
 
-    def check_scalar(self, data: object) -> TypeGuard[FloatLike]:
+    def _check_scalar(self, data: object) -> TypeGuard[FloatLike]:
         return isinstance(data, FloatLike)
 
     def _cast_scalar_unchecked(self, data: object) -> TFloatScalar_co:
diff --git a/src/zarr/core/dtype/npy/int.py b/src/zarr/core/dtype/npy/int.py
index 92705917f9..aed577ee44 100644
--- a/src/zarr/core/dtype/npy/int.py
+++ b/src/zarr/core/dtype/npy/int.py
@@ -47,17 +47,17 @@ class BaseInt(ZDType[TIntDType_co, TIntScalar_co], HasItemSize):
     _zarr_v2_names: ClassVar[tuple[str, ...]]
 
     @classmethod
-    def check_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> TypeGuard[str]:
+    def _check_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> TypeGuard[str]:
         """
         Check that the input is a valid JSON representation of this data type.
         """
         return data in cls._zarr_v2_names
 
     @classmethod
-    def check_json_v3(cls, data: JSON) -> TypeGuard[str]:
+    def _check_json_v3(cls, data: JSON) -> TypeGuard[str]:
         return data == cls._zarr_v3_name
 
-    def check_scalar(self, data: object) -> TypeGuard[IntLike]:
+    def _check_scalar(self, data: object) -> TypeGuard[IntLike]:
         return isinstance(data, IntLike)
 
     def _cast_scalar_unchecked(self, data: object) -> TIntScalar_co:
@@ -146,7 +146,7 @@ def to_json(self, zarr_format: ZarrFormat) -> Literal["int8", "|i1"]:
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     @classmethod
-    def _from_native_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
+    def _from_native_dtype_unchecked(cls, dtype: TBaseDType) -> Self:
         return cls()
 
     def to_native_dtype(self: Self) -> np.dtypes.Int8DType:
@@ -196,7 +196,7 @@ def to_json(self, zarr_format: ZarrFormat) -> Literal["uint8", "|u1"]:
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     @classmethod
-    def _from_native_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
+    def _from_native_dtype_unchecked(cls, dtype: TBaseDType) -> Self:
         return cls()
 
     def to_native_dtype(self: Self) -> np.dtypes.UInt8DType:
@@ -246,7 +246,7 @@ def to_json(self, zarr_format: ZarrFormat) -> Literal["int16", ">i2", "<i2"]:
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     @classmethod
-    def _from_native_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
+    def _from_native_dtype_unchecked(cls, dtype: TBaseDType) -> Self:
         byte_order = cast("EndiannessNumpy", dtype.byteorder)
         return cls(endianness=endianness_from_numpy_str(byte_order))
 
@@ -303,7 +303,7 @@ def to_json(self, zarr_format: ZarrFormat) -> Literal["uint16", ">u2", "<u2"]:
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     @classmethod
-    def _from_native_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
+    def _from_native_dtype_unchecked(cls, dtype: TBaseDType) -> Self:
         byte_order = cast("EndiannessNumpy", dtype.byteorder)
         return cls(endianness=endianness_from_numpy_str(byte_order))
 
@@ -366,14 +366,14 @@ def from_native_dtype(cls: type[Self], dtype: TBaseDType) -> Self:
         # despite the two classes being different. Thus we will create an instance of `cls` with the
         # latter dtype, after pulling in the byte order of the input
         if dtype == np.dtypes.Int32DType():
-            return cls._from_native_dtype_unsafe(
+            return cls._from_native_dtype_unchecked(
                 np.dtypes.Int32DType().newbyteorder(dtype.byteorder)
             )
         else:
             return super().from_native_dtype(dtype)
 
     @classmethod
-    def _from_native_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
+    def _from_native_dtype_unchecked(cls, dtype: TBaseDType) -> Self:
         byte_order = cast("EndiannessNumpy", dtype.byteorder)
         return cls(endianness=endianness_from_numpy_str(byte_order))
 
@@ -427,7 +427,7 @@ def to_json(self, zarr_format: ZarrFormat) -> Literal["uint32", ">u4", "<u4"]:
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
 
     @classmethod
-    def _from_native_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
+    def _from_native_dtype_unchecked(cls, dtype: TBaseDType) -> Self:
         byte_order = cast("EndiannessNumpy", dtype.byteorder)
         return cls(endianness=endianness_from_numpy_str(byte_order))
 
@@ -481,7 +481,7 @@ def to_json(self, zarr_format: ZarrFormat) -> Literal["int64", ">i8", "<i8"]:
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
 
     @classmethod
-    def _from_native_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
+    def _from_native_dtype_unchecked(cls, dtype: TBaseDType) -> Self:
         byte_order = cast("EndiannessNumpy", dtype.byteorder)
         return cls(endianness=endianness_from_numpy_str(byte_order))
 
@@ -537,7 +537,7 @@ def to_json(self, zarr_format: ZarrFormat) -> Literal["uint64", ">u8", "<u8"]:
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     @classmethod
-    def _from_native_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
+    def _from_native_dtype_unchecked(cls, dtype: TBaseDType) -> Self:
         byte_order = cast("EndiannessNumpy", dtype.byteorder)
         return cls(endianness=endianness_from_numpy_str(byte_order))
 
diff --git a/src/zarr/core/dtype/npy/string.py b/src/zarr/core/dtype/npy/string.py
index 2299b7aab1..853f32806d 100644
--- a/src/zarr/core/dtype/npy/string.py
+++ b/src/zarr/core/dtype/npy/string.py
@@ -1,6 +1,5 @@
 from __future__ import annotations
 
-import base64
 import re
 from dataclasses import dataclass
 from typing import TYPE_CHECKING, ClassVar, Literal, Self, TypedDict, TypeGuard, cast, overload
@@ -8,7 +7,13 @@
 import numpy as np
 
 from zarr.core.common import NamedConfig
-from zarr.core.dtype.common import HasEndianness, HasItemSize, HasLength, HasObjectCodec
+from zarr.core.dtype.common import (
+    HasEndianness,
+    HasItemSize,
+    HasLength,
+    HasObjectCodec,
+    v3_unstable_dtype_warning,
+)
 from zarr.core.dtype.npy.common import (
     EndiannessNumpy,
     check_json_str,
@@ -29,96 +34,7 @@ class LengthBytesConfig(TypedDict):
 
 
 # TDO: Fix this terrible name
-FixedLengthASCIIJSONV3 = NamedConfig[Literal["fixed_length_ascii"], LengthBytesConfig]
-
-
-@dataclass(frozen=True, kw_only=True)
-class FixedLengthASCII(ZDType[np.dtypes.BytesDType[int], np.bytes_], HasLength, HasItemSize):
-    dtype_cls = np.dtypes.BytesDType
-    _zarr_v3_name: ClassVar[Literal["fixed_length_ascii"]] = "fixed_length_ascii"
-
-    @classmethod
-    def _from_native_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
-        return cls(length=dtype.itemsize)
-
-    def to_native_dtype(self) -> np.dtypes.BytesDType[int]:
-        return self.dtype_cls(self.length)
-
-    @classmethod
-    def check_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> TypeGuard[str]:
-        """
-        Check that the input is a valid JSON representation of a numpy S dtype.
-        """
-        # match |S1, |S2, etc
-        return isinstance(data, str) and re.match(r"^\|S\d+$", data) is not None
-
-    @classmethod
-    def check_json_v3(cls, data: JSON) -> TypeGuard[FixedLengthASCIIJSONV3]:
-        return (
-            isinstance(data, dict)
-            and set(data.keys()) == {"name", "configuration"}
-            and data["name"] == cls._zarr_v3_name
-            and isinstance(data["configuration"], dict)
-            and "length_bytes" in data["configuration"]
-        )
-
-    @overload
-    def to_json(self, zarr_format: Literal[2]) -> str: ...
-
-    @overload
-    def to_json(self, zarr_format: Literal[3]) -> FixedLengthASCIIJSONV3: ...
-
-    def to_json(self, zarr_format: ZarrFormat) -> str | FixedLengthASCIIJSONV3:
-        if zarr_format == 2:
-            return self.to_native_dtype().str
-        elif zarr_format == 3:
-            return {
-                "name": self._zarr_v3_name,
-                "configuration": {"length_bytes": self.length},
-            }
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
-
-    @classmethod
-    def _from_json_unchecked(
-        cls, data: DTypeJSON_V2 | DTypeJSON_V3, *, zarr_format: ZarrFormat
-    ) -> Self:
-        if zarr_format == 2:
-            return cls.from_native_dtype(np.dtype(data))  # type: ignore[arg-type]
-        elif zarr_format == 3:
-            return cls(length=data["configuration"]["length_bytes"])  # type: ignore[index, call-overload]
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
-
-    def default_scalar(self) -> np.bytes_:
-        return np.bytes_(b"")
-
-    def to_json_scalar(self, data: object, *, zarr_format: ZarrFormat) -> str:
-        return base64.standard_b64encode(data).decode("ascii")  # type: ignore[arg-type]
-
-    def from_json_scalar(self, data: JSON, *, zarr_format: ZarrFormat) -> np.bytes_:
-        if check_json_str(data):
-            return self.to_native_dtype().type(base64.standard_b64decode(data.encode("ascii")))
-        raise TypeError(f"Invalid type: {data}. Expected a string.")  # pragma: no cover
-
-    def check_scalar(self, data: object) -> bool:
-        # this is generous for backwards compatibility
-        return isinstance(data, np.bytes_ | str | bytes | int)
-
-    def _cast_scalar_unchecked(self, data: object) -> np.bytes_:
-        # We explicitly truncate the result because of the following numpy behavior:
-        # >>> x = np.dtype('S3').type('hello world')
-        # >>> x
-        # np.bytes_(b'hello world')
-        # >>> x.dtype
-        # dtype('S11')
-
-        if isinstance(data, int):
-            return self.to_native_dtype().type(str(data)[: self.length])
-        else:
-            return self.to_native_dtype().type(data[: self.length])  # type: ignore[index]
-
-    @property
-    def item_size(self) -> int:
-        return self.length
+FixedLengthBytesJSONV3 = NamedConfig[Literal["fixed_length_bytes"], LengthBytesConfig]
 
 
 # TODO: Fix this terrible name
@@ -134,7 +50,7 @@ class FixedLengthUTF32(
     code_point_bytes: ClassVar[int] = 4  # utf32 is 4 bytes per code point
 
     @classmethod
-    def _from_native_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
+    def _from_native_dtype_unchecked(cls, dtype: TBaseDType) -> Self:
         byte_order = cast("EndiannessNumpy", dtype.byteorder)
         return cls(
             length=dtype.itemsize // (cls.code_point_bytes),
@@ -146,14 +62,14 @@ def to_native_dtype(self) -> np.dtypes.StrDType[int]:
         return self.dtype_cls(self.length).newbyteorder(byte_order)
 
     @classmethod
-    def check_json_v2(cls, data: JSON, object_codec_id: str | None = None) -> TypeGuard[str]:
+    def _check_json_v2(cls, data: JSON, object_codec_id: str | None = None) -> TypeGuard[str]:
         """
         Check that the input is a valid JSON representation of a numpy S dtype.
         """
         return isinstance(data, str) and re.match(r"^[><]U\d+$", data) is not None
 
     @classmethod
-    def check_json_v3(cls, data: JSON) -> TypeGuard[FixedLengthUTF32JSONV3]:
+    def _check_json_v3(cls, data: JSON) -> TypeGuard[FixedLengthUTF32JSONV3]:
         return (
             isinstance(data, dict)
             and set(data.keys()) == {"name", "configuration"}
@@ -174,6 +90,7 @@ def to_json(self, zarr_format: ZarrFormat) -> str | FixedLengthUTF32JSONV3:
         if zarr_format == 2:
             return self.to_native_dtype().str
         elif zarr_format == 3:
+            v3_unstable_dtype_warning(self)
             return {
                 "name": self._zarr_v3_name,
                 "configuration": {"length_bytes": self.length * self.code_point_bytes},
@@ -201,7 +118,7 @@ def from_json_scalar(self, data: JSON, *, zarr_format: ZarrFormat) -> np.str_:
             return self.to_native_dtype().type(data)
         raise TypeError(f"Invalid type: {data}. Expected a string.")  # pragma: no cover
 
-    def check_scalar(self, data: object) -> bool:
+    def _check_scalar(self, data: object) -> TypeGuard[str | np.str_ | bytes | int]:
         # this is generous for backwards compatibility
         return isinstance(data, str | np.str_ | bytes | int)
 
@@ -223,23 +140,32 @@ def item_size(self) -> int:
         return self.length * self.code_point_bytes
 
 
+def check_vlen_string_json_scalar(data: object) -> TypeGuard[int | str | float]:
+    """
+    This function checks the type of JSON-encoded variable length strings. It is generous for
+    backwards compatibility, as zarr-python v2 would use ints for variable length strings
+    fill values
+    """
+    return isinstance(data, int | str | float)
+
+
 if _NUMPY_SUPPORTS_VLEN_STRING:
 
     @dataclass(frozen=True, kw_only=True)
-    class VariableLengthString(ZDType[np.dtypes.StringDType, str], HasObjectCodec):  # type: ignore[type-var]
+    class VariableLengthUTF8(ZDType[np.dtypes.StringDType, str], HasObjectCodec):  # type: ignore[type-var]
         dtype_cls = np.dtypes.StringDType
         _zarr_v3_name: ClassVar[Literal["variable_length_utf8"]] = "variable_length_utf8"
         object_codec_id = "vlen-utf8"
 
         @classmethod
-        def _from_native_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
+        def _from_native_dtype_unchecked(cls, dtype: TBaseDType) -> Self:
             return cls()
 
         def to_native_dtype(self) -> np.dtypes.StringDType:
             return self.dtype_cls()
 
         @classmethod
-        def check_json_v2(
+        def _check_json_v2(
             cls, data: JSON, *, object_codec_id: str | None = None
         ) -> TypeGuard[Literal["|O"]]:
             """
@@ -249,7 +175,7 @@ def check_json_v2(
             return data == "|O" and object_codec_id == cls.object_codec_id
 
         @classmethod
-        def check_json_v3(cls, data: JSON) -> TypeGuard[Literal["variable_length_utf8"]]:
+        def _check_json_v3(cls, data: JSON) -> TypeGuard[Literal["variable_length_utf8"]]:
             return data == cls._zarr_v3_name
 
         @overload
@@ -265,6 +191,7 @@ def to_json(self, zarr_format: ZarrFormat) -> Literal["|O", "variable_length_utf
                 # that practice
                 return "|O"
             elif zarr_format == 3:
+                v3_unstable_dtype_warning(self)
                 return self._zarr_v3_name
             raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
@@ -278,14 +205,16 @@ def default_scalar(self) -> str:
             return ""
 
         def to_json_scalar(self, data: object, *, zarr_format: ZarrFormat) -> str:
-            return str(data)
+            if self._check_scalar(data):
+                return data
+            raise TypeError(f"Invalid type: {data}. Expected a string.")
 
         def from_json_scalar(self, data: JSON, *, zarr_format: ZarrFormat) -> str:
-            if not check_json_str(data):
-                raise TypeError(f"Invalid type: {data}. Expected a string.")
-            return data
+            if not check_vlen_string_json_scalar(data):
+                raise TypeError(f"Invalid type: {data}. Expected a string or number.")
+            return str(data)
 
-        def check_scalar(self, data: object) -> bool:
+        def _check_scalar(self, data: object) -> TypeGuard[str]:
             return isinstance(data, str)
 
         def _cast_scalar_unchecked(self, data: object) -> str:
@@ -294,20 +223,20 @@ def _cast_scalar_unchecked(self, data: object) -> str:
 else:
     # Numpy pre-2 does not have a variable length string dtype, so we use the Object dtype instead.
     @dataclass(frozen=True, kw_only=True)
-    class VariableLengthString(ZDType[np.dtypes.ObjectDType, str], HasObjectCodec):  # type: ignore[no-redef]
+    class VariableLengthUTF8(ZDType[np.dtypes.ObjectDType, str], HasObjectCodec):  # type: ignore[no-redef]
         dtype_cls = np.dtypes.ObjectDType
         _zarr_v3_name: ClassVar[Literal["variable_length_utf8"]] = "variable_length_utf8"
         object_codec_id = "vlen-utf8"
 
         @classmethod
-        def _from_native_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
+        def _from_native_dtype_unchecked(cls, dtype: TBaseDType) -> Self:
             return cls()
 
         def to_native_dtype(self) -> np.dtypes.ObjectDType:
             return self.dtype_cls()
 
         @classmethod
-        def check_json_v2(
+        def _check_json_v2(
             cls, data: JSON, *, object_codec_id: str | None = None
         ) -> TypeGuard[Literal["|O"]]:
             """
@@ -317,7 +246,7 @@ def check_json_v2(
             return data == "|O" and object_codec_id == cls.object_codec_id
 
         @classmethod
-        def check_json_v3(cls, data: JSON) -> TypeGuard[Literal["variable_length_utf8"]]:
+        def _check_json_v3(cls, data: JSON) -> TypeGuard[Literal["variable_length_utf8"]]:
             return data == cls._zarr_v3_name
 
         @overload
@@ -343,7 +272,9 @@ def default_scalar(self) -> str:
             return ""
 
         def to_json_scalar(self, data: object, *, zarr_format: ZarrFormat) -> str:
-            return data  # type: ignore[return-value]
+            if self._check_scalar(data):
+                return data
+            raise TypeError(f"Invalid type: {data}. Expected a string.")
 
         def from_json_scalar(self, data: JSON, *, zarr_format: ZarrFormat) -> str:
             """
@@ -353,7 +284,7 @@ def from_json_scalar(self, data: JSON, *, zarr_format: ZarrFormat) -> str:
                 raise TypeError(f"Invalid type: {data}. Expected a string.")
             return data
 
-        def check_scalar(self, data: object) -> bool:
+        def _check_scalar(self, data: object) -> TypeGuard[str]:
             return isinstance(data, str)
 
         def _cast_scalar_unchecked(self, data: object) -> str:
diff --git a/src/zarr/core/dtype/npy/sized.py b/src/zarr/core/dtype/npy/structured.py
similarity index 56%
rename from src/zarr/core/dtype/npy/sized.py
rename to src/zarr/core/dtype/npy/structured.py
index 69d6145ad4..579e0a9e27 100644
--- a/src/zarr/core/dtype/npy/sized.py
+++ b/src/zarr/core/dtype/npy/structured.py
@@ -1,8 +1,6 @@
-import base64
-import re
 from collections.abc import Sequence
 from dataclasses import dataclass
-from typing import Any, ClassVar, Literal, Self, TypedDict, TypeGuard, cast, overload
+from typing import Literal, Self, TypeGuard, cast, overload
 
 import numpy as np
 
@@ -10,7 +8,6 @@
 from zarr.core.dtype.common import (
     DataTypeValidationError,
     HasItemSize,
-    HasLength,
     v3_unstable_dtype_warning,
 )
 from zarr.core.dtype.npy.common import (
@@ -21,117 +18,6 @@
 from zarr.core.dtype.wrapper import DTypeJSON_V2, DTypeJSON_V3, TBaseDType, TBaseScalar, ZDType
 
 
-class FixedLengthBytesConfig(TypedDict):
-    length_bytes: int
-
-
-FixedLengthBytesJSONV3 = NamedConfig[Literal["fixed_length_bytes"], FixedLengthBytesConfig]
-
-
-@dataclass(frozen=True, kw_only=True)
-class FixedLengthBytes(ZDType[np.dtypes.VoidDType[int], np.void], HasLength, HasItemSize):
-    # np.dtypes.VoidDType is specified in an odd way in numpy
-    # it cannot be used to create instances of the dtype
-    # so we have to tell mypy to ignore this here
-    dtype_cls = np.dtypes.VoidDType  # type: ignore[assignment]
-    _zarr_v3_name: ClassVar[Literal["fixed_length_bytes"]] = "fixed_length_bytes"
-
-    @classmethod
-    def _from_native_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
-        return cls(length=dtype.itemsize)
-
-    def to_native_dtype(self) -> np.dtypes.VoidDType[int]:
-        # Numpy does not allow creating a void type
-        # by invoking np.dtypes.VoidDType directly
-        return cast("np.dtypes.VoidDType[int]", np.dtype(f"V{self.length}"))
-
-    @classmethod
-    def check_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> TypeGuard[str]:
-        # Check that the dtype is |V1, |V2, ...
-        return isinstance(data, str) and re.match(r"^\|V\d+$", data) is not None
-
-    @classmethod
-    def check_json_v3(cls, data: JSON) -> TypeGuard[FixedLengthBytesJSONV3]:
-        return (
-            isinstance(data, dict)
-            and set(data.keys()) == {"name", "configuration"}
-            and data["name"] == cls._zarr_v3_name
-            and isinstance(data["configuration"], dict)
-            and set(data["configuration"].keys()) == {"length_bytes"}
-        )
-
-    @overload
-    def to_json(self, zarr_format: Literal[2]) -> str: ...
-
-    @overload
-    def to_json(self, zarr_format: Literal[3]) -> FixedLengthBytesJSONV3: ...
-
-    def to_json(self, zarr_format: ZarrFormat) -> str | FixedLengthBytesJSONV3:
-        if zarr_format == 2:
-            return self.to_native_dtype().str
-        elif zarr_format == 3:
-            return {"name": self._zarr_v3_name, "configuration": {"length_bytes": self.length}}
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
-
-    @classmethod
-    def _from_json_unchecked(
-        cls, data: DTypeJSON_V2 | DTypeJSON_V3, *, zarr_format: ZarrFormat
-    ) -> Self:
-        if zarr_format == 2:
-            return cls.from_native_dtype(np.dtype(data))  # type: ignore[arg-type]
-        elif zarr_format == 3:
-            return cls(length=data["configuration"]["length_bytes"])  # type: ignore[index, call-overload]
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
-
-    @classmethod
-    def check_native_dtype(
-        cls: type[Self], dtype: TBaseDType
-    ) -> TypeGuard[np.dtypes.VoidDType[Any]]:
-        """
-        Numpy void dtype comes in two forms:
-        * If the ``fields`` attribute is ``None``, then the dtype represents N raw bytes.
-        * If the ``fields`` attribute is not ``None``, then the dtype represents a structured dtype,
-
-        In this check we ensure that ``fields`` is ``None``.
-
-        Parameters
-        ----------
-        dtype : TDType
-            The dtype to check.
-
-        Returns
-        -------
-        Bool
-            True if the dtype matches, False otherwise.
-        """
-        return cls.dtype_cls is type(dtype) and dtype.fields is None  # type: ignore[has-type]
-
-    def default_scalar(self) -> np.void:
-        return self.to_native_dtype().type(("\x00" * self.length).encode("ascii"))
-
-    def to_json_scalar(self, data: object, *, zarr_format: ZarrFormat) -> str:
-        return base64.standard_b64encode(self.cast_scalar(data).tobytes()).decode("ascii")
-
-    def from_json_scalar(self, data: JSON, *, zarr_format: ZarrFormat) -> np.void:
-        if check_json_str(data):
-            return self.to_native_dtype().type(base64.standard_b64decode(data))
-        raise TypeError(f"Invalid type: {data}. Expected a string.")  # pragma: no cover
-
-    def check_scalar(self, data: object) -> bool:
-        return isinstance(data, np.bytes_ | str | bytes | np.void)
-
-    def _cast_scalar_unchecked(self, data: object) -> np.void:
-        native_dtype = self.to_native_dtype()
-        # Without the second argument, numpy will return a void scalar for dtype V1.
-        # The second argument ensures that, if native_dtype is something like V10,
-        # the result will actually be a V10 scalar.
-        return native_dtype.type(data, native_dtype)
-
-    @property
-    def item_size(self) -> int:
-        return self.length
-
-
 # TODO: tighten this up, get a v3 spec in place, handle endianness, etc.
 @dataclass(frozen=True, kw_only=True)
 class Structured(ZDType[np.dtypes.VoidDType[int], np.void], HasItemSize):
@@ -153,7 +39,7 @@ def _cast_scalar_unchecked(self, data: object) -> np.void:
         return cast("np.void", res)
 
     @classmethod
-    def check_native_dtype(cls, dtype: TBaseDType) -> TypeGuard[np.dtypes.VoidDType[int]]:
+    def _check_native_dtype(cls, dtype: TBaseDType) -> TypeGuard[np.dtypes.VoidDType[int]]:
         """
         Check that this dtype is a numpy structured dtype
 
@@ -167,10 +53,10 @@ def check_native_dtype(cls, dtype: TBaseDType) -> TypeGuard[np.dtypes.VoidDType[
         TypeGuard[np.dtypes.VoidDType]
             True if the dtype matches, False otherwise.
         """
-        return super().check_native_dtype(dtype) and dtype.fields is not None
+        return super()._check_native_dtype(dtype) and dtype.fields is not None
 
     @classmethod
-    def _from_native_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
+    def _from_native_dtype_unchecked(cls, dtype: TBaseDType) -> Self:
         from zarr.core.dtype import get_data_type_from_native_dtype
 
         fields: list[tuple[str, ZDType[TBaseDType, TBaseScalar]]] = []
@@ -206,7 +92,7 @@ def to_json(self, zarr_format: ZarrFormat) -> DTypeJSON_V3 | DTypeJSON_V2:
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     @classmethod
-    def check_json_v2(
+    def _check_json_v2(
         cls, data: JSON, *, object_codec_id: str | None = None
     ) -> TypeGuard[list[object]]:
         # the actual JSON form is recursive and hard to annotate, so we give up and do
@@ -222,7 +108,7 @@ def check_json_v2(
         )
 
     @classmethod
-    def check_json_v3(
+    def _check_json_v3(
         cls, data: JSON
     ) -> TypeGuard[NamedConfig[Literal["structured"], dict[str, Sequence[tuple[str, JSON]]]]]:
         return (
@@ -243,7 +129,7 @@ def _from_json_unchecked(
 
         # This is a horrible mess, because this data type is recursive
         if zarr_format == 2:
-            if cls.check_json_v2(data):  # type: ignore[arg-type]
+            if cls._check_json_v2(data):  # type: ignore[arg-type]
                 # structured dtypes are constructed directly from a list of lists
                 # note that we do not handle the object codec here! this will prevent structured
                 # dtypes from containing object dtypes.
@@ -256,7 +142,7 @@ def _from_json_unchecked(
             else:
                 raise DataTypeValidationError(f"Invalid JSON representation of data type {cls}.")
         elif zarr_format == 3:
-            if cls.check_json_v3(data):  # type: ignore[arg-type]
+            if cls._check_json_v3(data):  # type: ignore[arg-type]
                 config = data["configuration"]
                 meta_fields = config["fields"]
                 fields = tuple(
@@ -278,7 +164,7 @@ def to_native_dtype(self) -> np.dtypes.VoidDType[int]:
     def to_json_scalar(self, data: object, *, zarr_format: ZarrFormat) -> str:
         return bytes_to_json(self.cast_scalar(data).tobytes(), zarr_format)
 
-    def check_scalar(self, data: object) -> bool:
+    def _check_scalar(self, data: object) -> bool:
         # TODO: implement something here!
         return True
 
diff --git a/src/zarr/core/dtype/npy/time.py b/src/zarr/core/dtype/npy/time.py
index 4c5ce45442..9f82d3d168 100644
--- a/src/zarr/core/dtype/npy/time.py
+++ b/src/zarr/core/dtype/npy/time.py
@@ -108,7 +108,7 @@ def __post_init__(self) -> None:
             raise ValueError(f"unit must be one of {get_args(DateTimeUnit)}, got {self.unit!r}.")
 
     @classmethod
-    def _from_native_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
+    def _from_native_dtype_unchecked(cls, dtype: TBaseDType) -> Self:
         unit, scale_factor = np.datetime_data(dtype.name)
         unit = cast("DateTimeUnit", unit)
         byteorder = cast("EndiannessNumpy", dtype.byteorder)
@@ -156,7 +156,7 @@ def to_json(self, zarr_format: ZarrFormat) -> str | DateTime64JSONV3 | TimeDelta
     def to_json_scalar(self, data: object, *, zarr_format: ZarrFormat) -> int:
         return datetimelike_to_int(data)  # type: ignore[arg-type]
 
-    def check_scalar(self, data: object) -> bool:
+    def _check_scalar(self, data: object) -> bool:
         # TODO: decide which values we should accept for datetimes.
         try:
             np.array([data], dtype=self.to_native_dtype())
@@ -197,7 +197,7 @@ def _cast_scalar_unchecked(self, data: object) -> np.timedelta64:
         return self.to_native_dtype().type(data)  # type: ignore[arg-type]
 
     @classmethod
-    def check_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> TypeGuard[str]:
+    def _check_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> TypeGuard[str]:
         # match <m[ns], >m[M], etc
         # consider making this a standalone function
         if not isinstance(data, str):
@@ -212,7 +212,7 @@ def check_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> Typ
             return data[4:-1].endswith(get_args(DateTimeUnit)) and data[-1] == "]"
 
     @classmethod
-    def check_json_v3(cls, data: JSON) -> TypeGuard[DateTime64JSONV3]:
+    def _check_json_v3(cls, data: JSON) -> TypeGuard[DateTime64JSONV3]:
         return (
             isinstance(data, dict)
             and set(data.keys()) == {"name", "configuration"}
@@ -243,7 +243,7 @@ def _cast_scalar_unchecked(self, data: object) -> np.datetime64:
         return self.to_native_dtype().type(data, f"{self.scale_factor}{self.unit}")  # type: ignore[no-any-return, call-overload]
 
     @classmethod
-    def check_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> TypeGuard[str]:
+    def _check_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> TypeGuard[str]:
         # match <M[ns], >M[M], etc
         # consider making this a standalone function
         if not isinstance(data, str):
@@ -258,7 +258,7 @@ def check_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> Typ
             return data[4:-1].endswith(get_args(DateTimeUnit)) and data[-1] == "]"
 
     @classmethod
-    def check_json_v3(cls, data: JSON) -> TypeGuard[DateTime64JSONV3]:
+    def _check_json_v3(cls, data: JSON) -> TypeGuard[DateTime64JSONV3]:
         return (
             isinstance(data, dict)
             and set(data.keys()) == {"name", "configuration"}
diff --git a/src/zarr/core/dtype/npy/vlen_bytes.py b/src/zarr/core/dtype/npy/vlen_bytes.py
deleted file mode 100644
index c25523f9ed..0000000000
--- a/src/zarr/core/dtype/npy/vlen_bytes.py
+++ /dev/null
@@ -1,75 +0,0 @@
-import base64
-from dataclasses import dataclass
-from typing import ClassVar, Literal, Self, TypeGuard, overload
-
-import numpy as np
-
-from zarr.core.common import JSON, ZarrFormat
-from zarr.core.dtype.common import HasObjectCodec, v3_unstable_dtype_warning
-from zarr.core.dtype.npy.common import check_json_str
-from zarr.core.dtype.wrapper import DTypeJSON_V2, DTypeJSON_V3, TBaseDType, ZDType
-
-
-@dataclass(frozen=True, kw_only=True)
-class VariableLengthBytes(ZDType[np.dtypes.ObjectDType, bytes], HasObjectCodec):
-    dtype_cls = np.dtypes.ObjectDType
-    _zarr_v3_name: ClassVar[Literal["variable_length_bytes"]] = "variable_length_bytes"
-    object_codec_id = "vlen-bytes"
-
-    @classmethod
-    def _from_native_dtype_unsafe(cls, dtype: TBaseDType) -> Self:
-        return cls()
-
-    def to_native_dtype(self) -> np.dtypes.ObjectDType:
-        return self.dtype_cls()
-
-    @classmethod
-    def check_json_v2(
-        cls, data: JSON, *, object_codec_id: str | None = None
-    ) -> TypeGuard[Literal["|O"]]:
-        """
-        Check that the input is a valid JSON representation of a numpy O dtype, and that the
-        object codec id is appropriate for variable-length UTF-8 strings.
-        """
-        return data == "|O" and object_codec_id == cls.object_codec_id
-
-    @classmethod
-    def check_json_v3(cls, data: JSON) -> TypeGuard[Literal["variable_length_utf8"]]:
-        return data == cls._zarr_v3_name
-
-    @overload
-    def to_json(self, zarr_format: Literal[2]) -> Literal["|O"]: ...
-
-    @overload
-    def to_json(self, zarr_format: Literal[3]) -> Literal["variable_length_bytes"]: ...
-
-    def to_json(self, zarr_format: ZarrFormat) -> Literal["|O", "variable_length_bytes"]:
-        if zarr_format == 2:
-            return "|O"
-        elif zarr_format == 3:
-            v3_unstable_dtype_warning(self)
-            return self._zarr_v3_name
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
-
-    @classmethod
-    def _from_json_unchecked(
-        cls, data: DTypeJSON_V2 | DTypeJSON_V3, *, zarr_format: ZarrFormat
-    ) -> Self:
-        return cls()
-
-    def default_scalar(self) -> bytes:
-        return b""
-
-    def to_json_scalar(self, data: object, *, zarr_format: ZarrFormat) -> str:
-        return base64.standard_b64encode(data).decode("ascii")  # type: ignore[arg-type]
-
-    def from_json_scalar(self, data: JSON, *, zarr_format: ZarrFormat) -> bytes:
-        if check_json_str(data):
-            return base64.standard_b64decode(data.encode("ascii"))
-        raise TypeError(f"Invalid type: {data}. Expected a string.")  # pragma: no cover
-
-    def check_scalar(self, data: object) -> bool:
-        return isinstance(data, bytes | str)
-
-    def _cast_scalar_unchecked(self, data: object) -> bytes:
-        return bytes(data)  # type: ignore[no-any-return, call-overload]
diff --git a/src/zarr/core/dtype/wrapper.py b/src/zarr/core/dtype/wrapper.py
index 4c399bbb84..b117656c36 100644
--- a/src/zarr/core/dtype/wrapper.py
+++ b/src/zarr/core/dtype/wrapper.py
@@ -84,7 +84,7 @@ class ZDType(Generic[TDType_co, TScalar_co], ABC):
     _zarr_v3_name: ClassVar[str]
 
     @classmethod
-    def check_native_dtype(cls: type[Self], dtype: TBaseDType) -> TypeGuard[TDType_co]:
+    def _check_native_dtype(cls: type[Self], dtype: TBaseDType) -> TypeGuard[TDType_co]:
         """
         Check that a data type matches the dtype_cls class attribute. Used as a type guard.
 
@@ -120,15 +120,15 @@ def from_native_dtype(cls: type[Self], dtype: TBaseDType) -> Self:
         TypeError
             If the dtype does not match the dtype_cls class attribute.
         """
-        if cls.check_native_dtype(dtype):
-            return cls._from_native_dtype_unsafe(dtype)
+        if cls._check_native_dtype(dtype):
+            return cls._from_native_dtype_unchecked(dtype)
         raise DataTypeValidationError(
             f"Invalid dtype: {dtype}. Expected an instance of {cls.dtype_cls}."
         )
 
     @classmethod
     @abstractmethod
-    def _from_native_dtype_unsafe(cls: type[Self], dtype: TBaseDType) -> Self:
+    def _from_native_dtype_unchecked(cls: type[Self], dtype: TBaseDType) -> Self:
         """
         Wrap a native dtype without checking.
 
@@ -158,20 +158,21 @@ def to_native_dtype(self: Self) -> TDType_co:
 
     def cast_scalar(self, data: object) -> TScalar_co:
         """
-        Cast a scalar to the wrapped scalar type. The type is first checked for compatibility. If
-        it's incompatible with the associated scalar type, a ``TypeError`` will be raised.
+        Cast a python object to the wrapped scalar type.
+        The type of the provided scalar is first checked for compatibility.
+        If it's incompatible with the associated scalar type, a ``TypeError`` will be raised.
 
         Parameters
         ----------
-        data : TScalar
-            The scalar value to cast.
+        data : object
+            The python object to cast.
 
         Returns
         -------
         TScalar
             The cast value.
         """
-        if self.check_scalar(data):
+        if self._check_scalar(data):
             return self._cast_scalar_unchecked(data)
         msg = (
             f"The value {data!r} failed a type check. "
@@ -182,9 +183,9 @@ def cast_scalar(self, data: object) -> TScalar_co:
         raise TypeError(msg)
 
     @abstractmethod
-    def check_scalar(self, data: object) -> bool:
+    def _check_scalar(self, data: object) -> bool:
         """
-        Check that a scalar is a valid value for the wrapped data type.
+        Check that an python object is a valid scalar value for the wrapped data type.
 
         Parameters
         ----------
@@ -194,19 +195,20 @@ def check_scalar(self, data: object) -> bool:
         Returns
         -------
         Bool
-            True if the value is valid, False otherwise.
+            True if the object is valid, False otherwise.
         """
         ...
 
     @abstractmethod
     def _cast_scalar_unchecked(self, data: object) -> TScalar_co:
         """
-        Cast a scalar to the wrapped data type. This method should not perform any input validation.
+        Cast a python object to the wrapped data type.
+        This method should not perform any type checking.
 
         Parameters
         ----------
-        data : TScalar
-            The scalar value to cast.
+        data : object
+            The python object to cast.
 
         Returns
         -------
@@ -232,7 +234,7 @@ def default_scalar(self) -> TScalar_co:
 
     @classmethod
     @abstractmethod
-    def check_json_v2(
+    def _check_json_v2(
         cls: type[Self], data: JSON, *, object_codec_id: str | None = None
     ) -> TypeGuard[DTypeJSON_V2]:
         """
@@ -260,7 +262,7 @@ def check_json_v2(
 
     @classmethod
     @abstractmethod
-    def check_json_v3(cls: type[Self], data: JSON) -> TypeGuard[DTypeJSON_V3]:
+    def _check_json_v3(cls: type[Self], data: JSON) -> TypeGuard[DTypeJSON_V3]:
         """
         Check that a JSON representation of a data type matches the dtype_cls class attribute. Used
         as a type guard. This base implementation checks that the input is a dictionary,
@@ -317,7 +319,7 @@ def from_json_v3(cls: type[Self], data: JSON) -> Self:
         Self
             The wrapped data type.
         """
-        if cls.check_json_v3(data):
+        if cls._check_json_v3(data):
             return cls._from_json_unchecked(data, zarr_format=3)
         raise DataTypeValidationError(f"Invalid JSON representation of data type {cls}: {data}")
 
@@ -336,7 +338,7 @@ def from_json_v2(cls: type[Self], data: JSON, *, object_codec_id: str | None) ->
         Self
             The wrapped data type.
         """
-        if cls.check_json_v2(data, object_codec_id=object_codec_id):
+        if cls._check_json_v2(data, object_codec_id=object_codec_id):
             return cls._from_json_unchecked(data, zarr_format=2)
         raise DataTypeValidationError(
             f"Invalid JSON representation of data type {cls}: {data!r}, object_codec_id={object_codec_id!r}"
diff --git a/src/zarr/core/metadata/v3.py b/src/zarr/core/metadata/v3.py
index 83b9bd7bc8..bd02a67084 100644
--- a/src/zarr/core/metadata/v3.py
+++ b/src/zarr/core/metadata/v3.py
@@ -5,7 +5,7 @@
 from zarr.abc.metadata import Metadata
 from zarr.core.buffer.core import default_buffer_prototype
 from zarr.core.dtype import (
-    VariableLengthString,
+    VariableLengthUTF8,
     ZDType,
     get_data_type_from_json_v3,
 )
@@ -97,7 +97,7 @@ def validate_codecs(codecs: tuple[Codec, ...], dtype: ZDType[TBaseDType, TBaseSc
     # TODO: use codec ID instead of class name
     codec_class_name = abc.__class__.__name__
     # TODO: Fix typing here
-    if isinstance(dtype, VariableLengthString) and not codec_class_name == "VLenUTF8Codec":  # type: ignore[unreachable]
+    if isinstance(dtype, VariableLengthUTF8) and not codec_class_name == "VLenUTF8Codec":  # type: ignore[unreachable]
         raise ValueError(
             f"For string dtype, ArrayBytesCodec must be `VLenUTF8Codec`, got `{codec_class_name}`."
         )
diff --git a/tests/test_array.py b/tests/test_array.py
index ee0a506538..e300b70f8d 100644
--- a/tests/test_array.py
+++ b/tests/test_array.py
@@ -46,10 +46,10 @@
 from zarr.core.dtype.npy.common import endianness_from_numpy_str
 from zarr.core.dtype.npy.float import Float32, Float64
 from zarr.core.dtype.npy.int import Int16, UInt8
-from zarr.core.dtype.npy.sized import (
+from zarr.core.dtype.npy.string import VariableLengthUTF8
+from zarr.core.dtype.npy.structured import (
     Structured,
 )
-from zarr.core.dtype.npy.string import VariableLengthString
 from zarr.core.dtype.npy.time import DateTime64, TimeDelta64
 from zarr.core.dtype.wrapper import ZDType
 from zarr.core.group import AsyncGroup
@@ -1036,7 +1036,7 @@ def test_dtype_forms(dtype: ZDType[Any, Any], store: Store, zarr_format: ZarrFor
 
         # Structured dtypes do not have a numpy string representation that uniquely identifies them
         if not isinstance(dtype, Structured):
-            if isinstance(dtype, VariableLengthString):
+            if isinstance(dtype, VariableLengthUTF8):
                 # in numpy 2.3, StringDType().str becomes the string 'StringDType()' which numpy
                 # does not accept as a string representation of the dtype.
                 c = zarr.create_array(
@@ -1073,6 +1073,7 @@ def test_dtype_roundtrip(
         assert a.dtype == b.dtype
 
     @staticmethod
+    @pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning")
     @pytest.mark.parametrize("dtype", ["uint8", "float32", "U3", "S4", "V1"])
     @pytest.mark.parametrize(
         "compressors",
@@ -1298,9 +1299,9 @@ async def test_v2_chunk_encoding(
         assert arr.filters == filters_expected
 
     @staticmethod
-    @pytest.mark.parametrize("dtype", [UInt8(), Float32(), VariableLengthString()])
+    @pytest.mark.parametrize("dtype", [UInt8(), Float32(), VariableLengthUTF8()])
     async def test_default_filters_compressors(
-        store: MemoryStore, dtype: UInt8 | Float32 | VariableLengthString, zarr_format: ZarrFormat
+        store: MemoryStore, dtype: UInt8 | Float32 | VariableLengthUTF8, zarr_format: ZarrFormat
     ) -> None:
         """
         Test that the default ``filters`` and ``compressors`` are used when ``create_array`` is invoked with ``filters`` and ``compressors`` unspecified.
@@ -1519,6 +1520,7 @@ def test_default_endianness(
 
 @pytest.mark.parametrize("value", [1, 1.4, "a", b"a", np.array(1)])
 @pytest.mark.parametrize("zarr_format", [2, 3])
+@pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning")
 def test_scalar_array(value: Any, zarr_format: ZarrFormat) -> None:
     arr = zarr.array(value, zarr_format=zarr_format)
     assert arr[...] == value
diff --git a/tests/test_codecs/test_vlen.py b/tests/test_codecs/test_vlen.py
index 9024efa7ed..6fe1863464 100644
--- a/tests/test_codecs/test_vlen.py
+++ b/tests/test_codecs/test_vlen.py
@@ -22,6 +22,7 @@
     expected_array_string_dtype = np.dtype("O")
 
 
+@pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning")
 @pytest.mark.parametrize("store", ["memory", "local"], indirect=["store"])
 @pytest.mark.parametrize("dtype", numpy_str_dtypes)
 @pytest.mark.parametrize("as_object_array", [False, True])
diff --git a/tests/test_config.py b/tests/test_config.py
index 58f88ec806..f02bb153e4 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -25,7 +25,7 @@
 from zarr.core.buffer.core import Buffer
 from zarr.core.codec_pipeline import BatchedCodecPipeline
 from zarr.core.config import BadConfigError, config
-from zarr.core.dtype import Int8, VariableLengthString
+from zarr.core.dtype import Int8, VariableLengthUTF8
 from zarr.core.indexing import SelectorTuple
 from zarr.registry import (
     fully_qualified_name,
@@ -312,7 +312,7 @@ async def test_default_codecs(dtype_category: str) -> None:
     """
     zdtype: ZDType[Any, Any]
     if dtype_category == "variable-length-string":
-        zdtype = VariableLengthString()
+        zdtype = VariableLengthUTF8()
     else:
         zdtype = Int8()
     expected_compressors = (GzipCodec(),)
diff --git a/tests/test_dtype/conftest.py b/tests/test_dtype/conftest.py
index b2aa89afd7..0be1c60088 100644
--- a/tests/test_dtype/conftest.py
+++ b/tests/test_dtype/conftest.py
@@ -6,7 +6,7 @@
 
 from zarr.core.dtype import data_type_registry
 from zarr.core.dtype.common import HasLength
-from zarr.core.dtype.npy.sized import Structured
+from zarr.core.dtype.npy.structured import Structured
 from zarr.core.dtype.npy.time import DateTime64, TimeDelta64
 from zarr.core.dtype.wrapper import ZDType
 
diff --git a/tests/test_dtype/test_npy/test_bytes.py b/tests/test_dtype/test_npy/test_bytes.py
new file mode 100644
index 0000000000..fcb43e551b
--- /dev/null
+++ b/tests/test_dtype/test_npy/test_bytes.py
@@ -0,0 +1,138 @@
+import numpy as np
+
+from tests.test_dtype.test_wrapper import BaseTestZDType, V2JsonTestParams
+from zarr.core.dtype.npy.bytes import NullTerminatedBytes, RawBytes, VariableLengthBytes
+
+
+class TestNullTerminatedBytes(BaseTestZDType):
+    test_cls = NullTerminatedBytes
+    valid_dtype = (np.dtype("|S10"), np.dtype("|S4"))
+    invalid_dtype = (
+        np.dtype(np.int8),
+        np.dtype(np.float64),
+        np.dtype("|U10"),
+    )
+    valid_json_v2 = (
+        V2JsonTestParams(dtype="|S0"),
+        V2JsonTestParams(dtype="|S2"),
+        V2JsonTestParams(dtype="|S4"),
+    )
+    valid_json_v3 = ({"name": "null_terminated_bytes", "configuration": {"length_bytes": 10}},)
+    invalid_json_v2 = (
+        "|S",
+        "|U10",
+        "|f8",
+    )
+    invalid_json_v3 = (
+        {"name": "fixed_length_ascii", "configuration": {"length_bits": 0}},
+        {"name": "numpy.fixed_length_ascii", "configuration": {"length_bits": "invalid"}},
+    )
+
+    scalar_v2_params = (
+        (NullTerminatedBytes(length=0), ""),
+        (NullTerminatedBytes(length=2), "YWI="),
+        (NullTerminatedBytes(length=4), "YWJjZA=="),
+    )
+    scalar_v3_params = (
+        (NullTerminatedBytes(length=0), ""),
+        (NullTerminatedBytes(length=2), "YWI="),
+        (NullTerminatedBytes(length=4), "YWJjZA=="),
+    )
+    cast_value_params = (
+        (NullTerminatedBytes(length=0), "", np.bytes_("")),
+        (NullTerminatedBytes(length=2), "ab", np.bytes_("ab")),
+        (NullTerminatedBytes(length=4), "abcdefg", np.bytes_("abcd")),
+    )
+    item_size_params = (
+        NullTerminatedBytes(length=0),
+        NullTerminatedBytes(length=4),
+        NullTerminatedBytes(length=10),
+    )
+
+
+class TestRawBytes(BaseTestZDType):
+    test_cls = RawBytes
+    valid_dtype = (np.dtype("|V10"),)
+    invalid_dtype = (
+        np.dtype(np.int8),
+        np.dtype(np.float64),
+        np.dtype("|S10"),
+    )
+    valid_json_v2 = (V2JsonTestParams(dtype="|V10"),)
+    valid_json_v3 = (
+        {"name": "raw_bytes", "configuration": {"length_bytes": 0}},
+        {"name": "raw_bytes", "configuration": {"length_bytes": 8}},
+    )
+
+    invalid_json_v2 = (
+        "|V",
+        "|S10",
+        "|f8",
+    )
+    invalid_json_v3 = (
+        {"name": "r10"},
+        {"name": "r-80"},
+    )
+
+    scalar_v2_params = (
+        (RawBytes(length=0), ""),
+        (RawBytes(length=2), "YWI="),
+        (RawBytes(length=4), "YWJjZA=="),
+    )
+    scalar_v3_params = (
+        (RawBytes(length=0), ""),
+        (RawBytes(length=2), "YWI="),
+        (RawBytes(length=4), "YWJjZA=="),
+    )
+    cast_value_params = (
+        (RawBytes(length=0), b"", np.void(b"")),
+        (RawBytes(length=2), b"ab", np.void(b"ab")),
+        (RawBytes(length=4), b"abcd", np.void(b"abcd")),
+    )
+    item_size_params = (
+        RawBytes(length=0),
+        RawBytes(length=4),
+        RawBytes(length=10),
+    )
+
+
+class TestVariableLengthBytes(BaseTestZDType):
+    test_cls = VariableLengthBytes
+    valid_dtype = (np.dtype("|O"),)
+    invalid_dtype = (
+        np.dtype(np.int8),
+        np.dtype(np.float64),
+        np.dtype("|U10"),
+    )
+    valid_json_v2 = (V2JsonTestParams(dtype="|O", object_codec_id="vlen-bytes"),)
+    valid_json_v3 = ("variable_length_bytes",)
+    invalid_json_v2 = (
+        "|S",
+        "|U10",
+        "|f8",
+    )
+    invalid_json_v3 = (
+        {"name": "fixed_length_ascii", "configuration": {"length_bits": 0}},
+        {"name": "numpy.fixed_length_ascii", "configuration": {"length_bits": "invalid"}},
+    )
+
+    scalar_v2_params = (
+        (VariableLengthBytes(), ""),
+        (VariableLengthBytes(), "YWI="),
+        (VariableLengthBytes(), "YWJjZA=="),
+    )
+    scalar_v3_params = (
+        (VariableLengthBytes(), ""),
+        (VariableLengthBytes(), "YWI="),
+        (VariableLengthBytes(), "YWJjZA=="),
+    )
+    cast_value_params = (
+        (VariableLengthBytes(), "", b""),
+        (VariableLengthBytes(), "ab", b"ab"),
+        (VariableLengthBytes(), "abcdefg", b"abcdefg"),
+    )
+    item_size_params = (
+        VariableLengthBytes(),
+        VariableLengthBytes(),
+        VariableLengthBytes(),
+    )
diff --git a/tests/test_dtype/test_npy/test_string.py b/tests/test_dtype/test_npy/test_string.py
index 73c8612db4..66a8d8d1ff 100644
--- a/tests/test_dtype/test_npy/test_string.py
+++ b/tests/test_dtype/test_npy/test_string.py
@@ -3,13 +3,13 @@
 import numpy as np
 
 from tests.test_dtype.test_wrapper import BaseTestZDType, V2JsonTestParams
-from zarr.core.dtype import FixedLengthASCII, FixedLengthUTF32
-from zarr.core.dtype.npy.string import _NUMPY_SUPPORTS_VLEN_STRING, VariableLengthString
+from zarr.core.dtype import FixedLengthUTF32
+from zarr.core.dtype.npy.string import _NUMPY_SUPPORTS_VLEN_STRING, VariableLengthUTF8
 
 if _NUMPY_SUPPORTS_VLEN_STRING:
 
     class TestVariableLengthString(BaseTestZDType):
-        test_cls = VariableLengthString  # type: ignore[assignment]
+        test_cls = VariableLengthUTF8  # type: ignore[assignment]
         valid_dtype = (np.dtypes.StringDType(),)  # type: ignore[assignment]
         invalid_dtype = (
             np.dtype(np.int8),
@@ -28,22 +28,22 @@ class TestVariableLengthString(BaseTestZDType):
             {"name": "invalid_name"},
         )
 
-        scalar_v2_params = ((VariableLengthString(), ""), (VariableLengthString(), "hi"))
+        scalar_v2_params = ((VariableLengthUTF8(), ""), (VariableLengthUTF8(), "hi"))
         scalar_v3_params = (
-            (VariableLengthString(), ""),
-            (VariableLengthString(), "hi"),
+            (VariableLengthUTF8(), ""),
+            (VariableLengthUTF8(), "hi"),
         )
 
         cast_value_params = (
-            (VariableLengthString(), "", np.str_("")),
-            (VariableLengthString(), "hi", np.str_("hi")),
+            (VariableLengthUTF8(), "", np.str_("")),
+            (VariableLengthUTF8(), "hi", np.str_("hi")),
         )
-        item_size_params = (VariableLengthString(),)
+        item_size_params = (VariableLengthUTF8(),)
 
 else:
 
     class TestVariableLengthString(BaseTestZDType):  # type: ignore[no-redef]
-        test_cls = VariableLengthString  # type: ignore[assignment]
+        test_cls = VariableLengthUTF8  # type: ignore[assignment]
         valid_dtype = (np.dtype("O"),)
         invalid_dtype = (
             np.dtype(np.int8),
@@ -62,64 +62,18 @@ class TestVariableLengthString(BaseTestZDType):  # type: ignore[no-redef]
             {"name": "invalid_name"},
         )
 
-        scalar_v2_params = ((VariableLengthString(), ""), (VariableLengthString(), "hi"))
+        scalar_v2_params = ((VariableLengthUTF8(), ""), (VariableLengthUTF8(), "hi"))
         scalar_v3_params = (
-            (VariableLengthString(), ""),
-            (VariableLengthString(), "hi"),
+            (VariableLengthUTF8(), ""),
+            (VariableLengthUTF8(), "hi"),
         )
 
         cast_value_params = (
-            (VariableLengthString(), "", np.str_("")),
-            (VariableLengthString(), "hi", np.str_("hi")),
+            (VariableLengthUTF8(), "", np.str_("")),
+            (VariableLengthUTF8(), "hi", np.str_("hi")),
         )
 
-        item_size_params = (VariableLengthString(),)
-
-
-class TestFixedLengthAscii(BaseTestZDType):
-    test_cls = FixedLengthASCII
-    valid_dtype = (np.dtype("|S10"), np.dtype("|S4"))
-    invalid_dtype = (
-        np.dtype(np.int8),
-        np.dtype(np.float64),
-        np.dtype("|U10"),
-    )
-    valid_json_v2 = (
-        V2JsonTestParams(dtype="|S0"),
-        V2JsonTestParams(dtype="|S2"),
-        V2JsonTestParams(dtype="|S4"),
-    )
-    valid_json_v3 = ({"name": "fixed_length_ascii", "configuration": {"length_bytes": 10}},)
-    invalid_json_v2 = (
-        "|S",
-        "|U10",
-        "|f8",
-    )
-    invalid_json_v3 = (
-        {"name": "fixed_length_ascii", "configuration": {"length_bits": 0}},
-        {"name": "numpy.fixed_length_ascii", "configuration": {"length_bits": "invalid"}},
-    )
-
-    scalar_v2_params = (
-        (FixedLengthASCII(length=0), ""),
-        (FixedLengthASCII(length=2), "YWI="),
-        (FixedLengthASCII(length=4), "YWJjZA=="),
-    )
-    scalar_v3_params = (
-        (FixedLengthASCII(length=0), ""),
-        (FixedLengthASCII(length=2), "YWI="),
-        (FixedLengthASCII(length=4), "YWJjZA=="),
-    )
-    cast_value_params = (
-        (FixedLengthASCII(length=0), "", np.bytes_("")),
-        (FixedLengthASCII(length=2), "ab", np.bytes_("ab")),
-        (FixedLengthASCII(length=4), "abcd", np.bytes_("abcd")),
-    )
-    item_size_params = (
-        FixedLengthASCII(length=0),
-        FixedLengthASCII(length=4),
-        FixedLengthASCII(length=10),
-    )
+        item_size_params = (VariableLengthUTF8(),)
 
 
 class TestFixedLengthUTF32(BaseTestZDType):
diff --git a/tests/test_dtype/test_npy/test_sized.py b/tests/test_dtype/test_npy/test_structured.py
similarity index 71%
rename from tests/test_dtype/test_npy/test_sized.py
rename to tests/test_dtype/test_npy/test_structured.py
index d7aef88168..71bbcdcefb 100644
--- a/tests/test_dtype/test_npy/test_sized.py
+++ b/tests/test_dtype/test_npy/test_structured.py
@@ -6,7 +6,6 @@
 
 from tests.test_dtype.test_wrapper import BaseTestZDType, V2JsonTestParams
 from zarr.core.dtype import (
-    FixedLengthBytes,
     Float16,
     Float64,
     Int32,
@@ -15,52 +14,6 @@
 )
 
 
-class TestFixedLengthBytes(BaseTestZDType):
-    test_cls = FixedLengthBytes
-    valid_dtype = (np.dtype("|V10"),)
-    invalid_dtype = (
-        np.dtype(np.int8),
-        np.dtype(np.float64),
-        np.dtype("|S10"),
-    )
-    valid_json_v2 = (V2JsonTestParams(dtype="|V10"),)
-    valid_json_v3 = (
-        {"name": "fixed_length_bytes", "configuration": {"length_bytes": 0}},
-        {"name": "fixed_length_bytes", "configuration": {"length_bytes": 8}},
-    )
-
-    invalid_json_v2 = (
-        "|V",
-        "|S10",
-        "|f8",
-    )
-    invalid_json_v3 = (
-        {"name": "r10"},
-        {"name": "r-80"},
-    )
-
-    scalar_v2_params = (
-        (FixedLengthBytes(length=0), ""),
-        (FixedLengthBytes(length=2), "YWI="),
-        (FixedLengthBytes(length=4), "YWJjZA=="),
-    )
-    scalar_v3_params = (
-        (FixedLengthBytes(length=0), ""),
-        (FixedLengthBytes(length=2), "YWI="),
-        (FixedLengthBytes(length=4), "YWJjZA=="),
-    )
-    cast_value_params = (
-        (FixedLengthBytes(length=0), b"", np.void(b"")),
-        (FixedLengthBytes(length=2), b"ab", np.void(b"ab")),
-        (FixedLengthBytes(length=4), b"abcd", np.void(b"abcd")),
-    )
-    item_size_params = (
-        FixedLengthBytes(length=0),
-        FixedLengthBytes(length=4),
-        FixedLengthBytes(length=10),
-    )
-
-
 class TestStructured(BaseTestZDType):
     test_cls = Structured
     valid_dtype = (
diff --git a/tests/test_dtype/test_wrapper.py b/tests/test_dtype/test_wrapper.py
index 0c3a2b106f..d359475a0d 100644
--- a/tests/test_dtype/test_wrapper.py
+++ b/tests/test_dtype/test_wrapper.py
@@ -99,10 +99,10 @@ def scalar_equals(self, scalar1: object, scalar2: object) -> bool:
         return scalar1 == scalar2
 
     def test_check_dtype_valid(self, valid_dtype: TBaseDType) -> None:
-        assert self.test_cls.check_native_dtype(valid_dtype)
+        assert self.test_cls._check_native_dtype(valid_dtype)
 
     def test_check_dtype_invalid(self, invalid_dtype: object) -> None:
-        assert not self.test_cls.check_native_dtype(invalid_dtype)  # type: ignore[arg-type]
+        assert not self.test_cls._check_native_dtype(invalid_dtype)  # type: ignore[arg-type]
 
     def test_from_dtype_roundtrip(self, valid_dtype: Any) -> None:
         zdtype = self.test_cls.from_native_dtype(valid_dtype)
diff --git a/tests/test_properties.py b/tests/test_properties.py
index ed8aa997c0..b8d50ef0b1 100644
--- a/tests/test_properties.py
+++ b/tests/test_properties.py
@@ -75,6 +75,7 @@ def deep_equal(a: Any, b: Any) -> bool:
     return a == b
 
 
+@pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning")
 @given(data=st.data(), zarr_format=zarr_formats)
 def test_array_roundtrip(data: st.DataObject, zarr_format: int) -> None:
     nparray = data.draw(numpy_arrays(zarr_formats=st.just(zarr_format)))
@@ -82,6 +83,7 @@ def test_array_roundtrip(data: st.DataObject, zarr_format: int) -> None:
     assert_array_equal(nparray, zarray[:])
 
 
+@pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning")
 @given(array=arrays())
 def test_array_creates_implicit_groups(array):
     path = array.path
@@ -101,7 +103,10 @@ def test_array_creates_implicit_groups(array):
 
 
 # this decorator removes timeout; not ideal but it should avoid intermittent CI failures
+
+
 @settings(deadline=None)
+@pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning")
 @given(data=st.data())
 def test_basic_indexing(data: st.DataObject) -> None:
     zarray = data.draw(simple_arrays())
@@ -117,6 +122,7 @@ def test_basic_indexing(data: st.DataObject) -> None:
 
 
 @given(data=st.data())
+@pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning")
 def test_oindex(data: st.DataObject) -> None:
     # integer_array_indices can't handle 0-size dimensions.
     zarray = data.draw(simple_arrays(shapes=npst.array_shapes(max_dims=4, min_side=1)))
@@ -138,6 +144,7 @@ def test_oindex(data: st.DataObject) -> None:
 
 
 @given(data=st.data())
+@pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning")
 def test_vindex(data: st.DataObject) -> None:
     # integer_array_indices can't handle 0-size dimensions.
     zarray = data.draw(simple_arrays(shapes=npst.array_shapes(max_dims=4, min_side=1)))
@@ -161,6 +168,7 @@ def test_vindex(data: st.DataObject) -> None:
 
 
 @given(store=stores, meta=array_metadata())  # type: ignore[misc]
+@pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning")
 async def test_roundtrip_array_metadata_from_store(
     store: Store, meta: ArrayV2Metadata | ArrayV3Metadata
 ) -> None:
@@ -180,6 +188,7 @@ async def test_roundtrip_array_metadata_from_store(
 
 
 @given(data=st.data(), zarr_format=zarr_formats)
+@pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning")
 def test_roundtrip_array_metadata_from_json(data: st.DataObject, zarr_format: int) -> None:
     """
     Verify that JSON serialization and deserialization of metadata is lossless.
@@ -281,6 +290,7 @@ def serialized_float_is_valid(serialized: numbers.Real | str) -> bool:
 
 
 @given(meta=array_metadata())  # type: ignore[misc]
+@pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning")
 def test_array_metadata_meets_spec(meta: ArrayV2Metadata | ArrayV3Metadata) -> None:
     """
     Validate that the array metadata produced by the library conforms to the relevant spec (V2 vs V3).
diff --git a/tests/test_regression/test_regression.py b/tests/test_regression/test_regression.py
index a1d13510c3..34c48a6933 100644
--- a/tests/test_regression/test_regression.py
+++ b/tests/test_regression/test_regression.py
@@ -12,8 +12,8 @@
 import zarr
 from zarr.core.array import Array
 from zarr.core.chunk_key_encodings import V2ChunkKeyEncoding
-from zarr.core.dtype.npy.string import VariableLengthString
-from zarr.core.dtype.npy.vlen_bytes import VariableLengthBytes
+from zarr.core.dtype.npy.bytes import VariableLengthBytes
+from zarr.core.dtype.npy.string import VariableLengthUTF8
 from zarr.storage import LocalStore
 
 if TYPE_CHECKING:
@@ -42,7 +42,8 @@ class ArrayParams:
 basic_codecs = GZip(), Blosc(), LZ4(), LZMA(), Zstd()
 basic_dtypes = "|b", ">i2", ">i4", ">f4", ">f8", "<f4", "<f8", ">c8", "<c8", ">c16", "<c16"
 datetime_dtypes = "<M8[10ns]", ">M8[10us]", "<m8[2ms]", ">m8[4ps]"
-string_dtypes = ">S1", "<S4", "<U1", ">U4"
+string_dtypes = "<U1", ">U4"
+bytes_dtypes = ">S1", "<S4", ">V10", "<V4"
 
 basic_array_cases = [
     ArrayParams(values=np.arange(4, dtype=dtype), fill_value=1, compressor=codec)
@@ -60,6 +61,16 @@ class ArrayParams:
     )
     for codec, dtype in product(basic_codecs, string_dtypes)
 ]
+
+bytes_array_cases = [
+    ArrayParams(
+        values=np.array([b"aaaa", b"bbbb", b"ccccc", b"dddd"], dtype=dtype),
+        fill_value=b"foo",
+        compressor=codec,
+    )
+    for codec, dtype in product(basic_codecs, bytes_dtypes)
+]
+
 vlen_string_cases = [
     ArrayParams(
         values=np.array(["a", "bb", "ccc", "dddd"], dtype="O"),
@@ -78,6 +89,7 @@ class ArrayParams:
 ]
 array_cases = (
     basic_array_cases
+    + bytes_array_cases
     + datetime_array_cases
     + string_array_cases
     + vlen_string_cases
@@ -94,7 +106,7 @@ def source_array(tmp_path: Path, request: pytest.FixtureRequest) -> Array:
     chunk_key_encoding = V2ChunkKeyEncoding(separator="/")
     dtype: ZDTypeLike
     if array_params.values.dtype == np.dtype("|O") and array_params.filters == (VLenUTF8(),):
-        dtype = VariableLengthString()  # type: ignore[assignment]
+        dtype = VariableLengthUTF8()  # type: ignore[assignment]
     elif array_params.values.dtype == np.dtype("|O") and array_params.filters == (VLenBytes(),):
         dtype = VariableLengthBytes()
     else:
diff --git a/tests/test_v2.py b/tests/test_v2.py
index fa2aa65b22..66e5a1ecfb 100644
--- a/tests/test_v2.py
+++ b/tests/test_v2.py
@@ -15,7 +15,8 @@
 from zarr import config
 from zarr.abc.store import Store
 from zarr.core.buffer.core import default_buffer_prototype
-from zarr.core.dtype import FixedLengthASCII, FixedLengthUTF32, Structured, VariableLengthString
+from zarr.core.dtype import FixedLengthUTF32, Structured, VariableLengthUTF8
+from zarr.core.dtype.npy.bytes import NullTerminatedBytes
 from zarr.core.dtype.wrapper import ZDType
 from zarr.core.sync import sync
 from zarr.storage import MemoryStore, StorePath
@@ -105,9 +106,9 @@ async def test_v2_encode_decode(dtype, expected_dtype, fill_value, fill_value_js
 @pytest.mark.parametrize(
     ("dtype", "value"),
     [
-        (FixedLengthASCII(length=1), b"Y"),
+        (NullTerminatedBytes(length=1), b"Y"),
         (FixedLengthUTF32(length=1), "Y"),
-        (VariableLengthString(), "Y"),
+        (VariableLengthUTF8(), "Y"),
     ],
 )
 def test_v2_encode_decode_with_data(dtype: ZDType[Any, Any], value: str):

From 24b6b356a0b0164e61fe6775c584810fb6e59d4b Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Fri, 6 Jun 2025 20:38:06 +0300
Subject: [PATCH 122/130] more v3 unstable dtype warnings, and their exemptions
 from tests

---
 src/zarr/core/dtype/common.py            |  4 ++--
 src/zarr/core/dtype/npy/bytes.py         |  1 +
 src/zarr/core/dtype/npy/string.py        |  1 +
 tests/test_array.py                      |  1 +
 tests/test_config.py                     |  1 +
 tests/test_dtype/test_npy/test_bytes.py  | 16 ++++++++++++++++
 tests/test_dtype/test_npy/test_string.py | 12 ++++++++++++
 tests/test_store/test_stateful.py        |  1 +
 8 files changed, 35 insertions(+), 2 deletions(-)

diff --git a/src/zarr/core/dtype/common.py b/src/zarr/core/dtype/common.py
index bbdc06c50d..5630f1692e 100644
--- a/src/zarr/core/dtype/common.py
+++ b/src/zarr/core/dtype/common.py
@@ -77,9 +77,9 @@ def v3_unstable_dtype_warning(dtype: object) -> None:
     """
     msg = (
         f"The data type ({dtype}) does not have a Zarr V3 specification. "
-        "That means that the representation of data saved with this data type may change without "
+        "That means that the representation of array saved with this data type may change without "
         "warning in a future version of Zarr Python. "
-        "Arrays stored with this data type may be unreadable by other Zarr libraries "
+        "Arrays stored with this data type may be unreadable by other Zarr libraries. "
         "Use this data type at your own risk! "
         "Check https://github.com/zarr-developers/zarr-extensions/tree/main/data-types for the "
         "status of data type specifications for Zarr V3."
diff --git a/src/zarr/core/dtype/npy/bytes.py b/src/zarr/core/dtype/npy/bytes.py
index 9d815ab849..347e058f12 100644
--- a/src/zarr/core/dtype/npy/bytes.py
+++ b/src/zarr/core/dtype/npy/bytes.py
@@ -154,6 +154,7 @@ def to_json(self, zarr_format: ZarrFormat) -> str | RawBytesJSONV3:
         if zarr_format == 2:
             return self.to_native_dtype().str
         elif zarr_format == 3:
+            v3_unstable_dtype_warning(self)
             return {"name": self._zarr_v3_name, "configuration": {"length_bytes": self.length}}
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
diff --git a/src/zarr/core/dtype/npy/string.py b/src/zarr/core/dtype/npy/string.py
index 853f32806d..21727b0c8c 100644
--- a/src/zarr/core/dtype/npy/string.py
+++ b/src/zarr/core/dtype/npy/string.py
@@ -259,6 +259,7 @@ def to_json(self, zarr_format: ZarrFormat) -> Literal["|O", "variable_length_utf
             if zarr_format == 2:
                 return "|O"
             elif zarr_format == 3:
+                v3_unstable_dtype_warning(self)
                 return self._zarr_v3_name
             raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
diff --git a/tests/test_array.py b/tests/test_array.py
index e300b70f8d..862b49da61 100644
--- a/tests/test_array.py
+++ b/tests/test_array.py
@@ -1300,6 +1300,7 @@ async def test_v2_chunk_encoding(
 
     @staticmethod
     @pytest.mark.parametrize("dtype", [UInt8(), Float32(), VariableLengthUTF8()])
+    @pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning")
     async def test_default_filters_compressors(
         store: MemoryStore, dtype: UInt8 | Float32 | VariableLengthUTF8, zarr_format: ZarrFormat
     ) -> None:
diff --git a/tests/test_config.py b/tests/test_config.py
index f02bb153e4..1dc6f8bf4f 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -306,6 +306,7 @@ class NewCodec2(BytesCodec):
 
 
 @pytest.mark.parametrize("dtype_category", ["variable-length-string", "default"])
+@pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning")
 async def test_default_codecs(dtype_category: str) -> None:
     """
     Test that the default compressors are sensitive to the current setting of the config.
diff --git a/tests/test_dtype/test_npy/test_bytes.py b/tests/test_dtype/test_npy/test_bytes.py
index fcb43e551b..53636891cb 100644
--- a/tests/test_dtype/test_npy/test_bytes.py
+++ b/tests/test_dtype/test_npy/test_bytes.py
@@ -1,6 +1,8 @@
 import numpy as np
+import pytest
 
 from tests.test_dtype.test_wrapper import BaseTestZDType, V2JsonTestParams
+from zarr.core.dtype.common import UnstableSpecificationWarning
 from zarr.core.dtype.npy.bytes import NullTerminatedBytes, RawBytes, VariableLengthBytes
 
 
@@ -136,3 +138,17 @@ class TestVariableLengthBytes(BaseTestZDType):
         VariableLengthBytes(),
         VariableLengthBytes(),
     )
+
+
+@pytest.mark.parametrize(
+    "zdtype", [NullTerminatedBytes(length=10), RawBytes(length=10), VariableLengthBytes()]
+)
+def test_unstable_dtype_warning(
+    zdtype: NullTerminatedBytes | RawBytes | VariableLengthBytes,
+) -> None:
+    """
+    Test that we get a warning when serializing a dtype without a zarr v3 spec to json
+    when zarr_format is 3
+    """
+    with pytest.raises(UnstableSpecificationWarning):
+        zdtype.to_json(zarr_format=3)
diff --git a/tests/test_dtype/test_npy/test_string.py b/tests/test_dtype/test_npy/test_string.py
index 66a8d8d1ff..c9bcdce29f 100644
--- a/tests/test_dtype/test_npy/test_string.py
+++ b/tests/test_dtype/test_npy/test_string.py
@@ -1,9 +1,11 @@
 from __future__ import annotations
 
 import numpy as np
+import pytest
 
 from tests.test_dtype.test_wrapper import BaseTestZDType, V2JsonTestParams
 from zarr.core.dtype import FixedLengthUTF32
+from zarr.core.dtype.common import UnstableSpecificationWarning
 from zarr.core.dtype.npy.string import _NUMPY_SUPPORTS_VLEN_STRING, VariableLengthUTF8
 
 if _NUMPY_SUPPORTS_VLEN_STRING:
@@ -113,3 +115,13 @@ class TestFixedLengthUTF32(BaseTestZDType):
         FixedLengthUTF32(length=4),
         FixedLengthUTF32(length=10),
     )
+
+
+@pytest.mark.parametrize("zdtype", [FixedLengthUTF32(length=10), VariableLengthUTF8()])
+def test_unstable_dtype_warning(zdtype: FixedLengthUTF32 | VariableLengthUTF8) -> None:
+    """
+    Test that we get a warning when serializing a dtype without a zarr v3 spec to json
+    when zarr_format is 3
+    """
+    with pytest.raises(UnstableSpecificationWarning):
+        zdtype.to_json(zarr_format=3)
diff --git a/tests/test_store/test_stateful.py b/tests/test_store/test_stateful.py
index a17d7a55be..c0997c3df3 100644
--- a/tests/test_store/test_stateful.py
+++ b/tests/test_store/test_stateful.py
@@ -15,6 +15,7 @@
 ]
 
 
+@pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning")
 def test_zarr_hierarchy(sync_store: Store):
     def mk_test_instance_sync() -> ZarrHierarchyStateMachine:
         return ZarrHierarchyStateMachine(sync_store)

From cbb0b0df980bb9575fda897c928d057c17b3fba6 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Sat, 7 Jun 2025 14:15:53 +0300
Subject: [PATCH 123/130] clean up typeddicts

---
 src/zarr/core/dtype/npy/bytes.py  | 8 ++++----
 src/zarr/core/dtype/npy/string.py | 4 ----
 2 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/src/zarr/core/dtype/npy/bytes.py b/src/zarr/core/dtype/npy/bytes.py
index 347e058f12..d489f2f610 100644
--- a/src/zarr/core/dtype/npy/bytes.py
+++ b/src/zarr/core/dtype/npy/bytes.py
@@ -15,7 +15,7 @@ class FixedLengthBytesConfig(TypedDict):
     length_bytes: int
 
 
-NTBytesJSONV3 = NamedConfig[Literal["null_terminated_bytes"], FixedLengthBytesConfig]
+NullTerminatedBytesJSONV3 = NamedConfig[Literal["null_terminated_bytes"], FixedLengthBytesConfig]
 RawBytesJSONV3 = NamedConfig[Literal["raw_bytes"], FixedLengthBytesConfig]
 
 
@@ -40,7 +40,7 @@ def _check_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> Ty
         return isinstance(data, str) and re.match(r"^\|S\d+$", data) is not None
 
     @classmethod
-    def _check_json_v3(cls, data: JSON) -> TypeGuard[NTBytesJSONV3]:
+    def _check_json_v3(cls, data: JSON) -> TypeGuard[NullTerminatedBytesJSONV3]:
         return (
             isinstance(data, dict)
             and set(data.keys()) == {"name", "configuration"}
@@ -53,9 +53,9 @@ def _check_json_v3(cls, data: JSON) -> TypeGuard[NTBytesJSONV3]:
     def to_json(self, zarr_format: Literal[2]) -> str: ...
 
     @overload
-    def to_json(self, zarr_format: Literal[3]) -> NTBytesJSONV3: ...
+    def to_json(self, zarr_format: Literal[3]) -> NullTerminatedBytesJSONV3: ...
 
-    def to_json(self, zarr_format: ZarrFormat) -> str | NTBytesJSONV3:
+    def to_json(self, zarr_format: ZarrFormat) -> str | NullTerminatedBytesJSONV3:
         if zarr_format == 2:
             return self.to_native_dtype().str
         elif zarr_format == 3:
diff --git a/src/zarr/core/dtype/npy/string.py b/src/zarr/core/dtype/npy/string.py
index 21727b0c8c..377c364ca2 100644
--- a/src/zarr/core/dtype/npy/string.py
+++ b/src/zarr/core/dtype/npy/string.py
@@ -33,10 +33,6 @@ class LengthBytesConfig(TypedDict):
     length_bytes: int
 
 
-# TDO: Fix this terrible name
-FixedLengthBytesJSONV3 = NamedConfig[Literal["fixed_length_bytes"], LengthBytesConfig]
-
-
 # TODO: Fix this terrible name
 FixedLengthUTF32JSONV3 = NamedConfig[Literal["fixed_length_utf32"], LengthBytesConfig]
 

From e8858697d2fb2d7e41065e7b0ccb53d77fab38bb Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Mon, 9 Jun 2025 12:46:07 +0300
Subject: [PATCH 124/130] update docstrings

---
 src/zarr/core/dtype/wrapper.py | 91 ++++++++++++++++++----------------
 1 file changed, 49 insertions(+), 42 deletions(-)

diff --git a/src/zarr/core/dtype/wrapper.py b/src/zarr/core/dtype/wrapper.py
index b117656c36..94fbe60242 100644
--- a/src/zarr/core/dtype/wrapper.py
+++ b/src/zarr/core/dtype/wrapper.py
@@ -1,8 +1,8 @@
 """
 Wrapper for native array data types.
 
-The `ZDType` class is an abstract base class for wrapping native array data types, e.g. numpy dtypes.
-It provides a common interface for working with data types in a way that is independent of the
+The ``ZDType`` class is an abstract base class for wrapping native array data types, e.g. NumPy dtypes.
+``ZDType`` provides a common interface for working with data types in a way that is independent of the
 underlying data type system.
 
 The wrapper class encapsulates a native data type. Instances of the class can be created from a
@@ -10,14 +10,15 @@
 wrapper class.
 
 The wrapper class is responsible for:
-- Reversibly serializing a native data type to Zarr V2 or Zarr V3 metadata.
+- Serializing and deserializing a native data type to Zarr V2 or Zarr V3 metadata.
   This ensures that the data type can be properly stored and retrieved from array metadata.
-- Reversibly serializing scalar values to Zarr V2 or Zarr V3 metadata. This is important for
+- Serializing and deserializing scalar values to Zarr V2 or Zarr V3 metadata. This is important for
   storing a fill value for an array in a manner that is valid for the data type.
 
-To add support for a new data type in Zarr, you should subclass the wrapper class and adapt its methods
+You can add support for a new data type in Zarr by subclassing ``ZDType`` wrapper class and adapt its methods
 to support your native data type. The wrapper class must be added to a data type registry
-(defined elsewhere) before ``create_array`` can properly handle the new data type.
+(defined elsewhere) before array creation routines or array reading routines can use your new data
+type.
 """
 
 from __future__ import annotations
@@ -69,11 +70,10 @@ class ZDType(Generic[TDType_co, TScalar_co], ABC):
     Attributes
     ----------
     dtype_cls : ClassVar[type[TDType]]
-        The wrapped dtype class. This is a class variable. Instances of this class cannot set it.
+        The wrapped dtype class. This is a class variable.
     _zarr_v3_name : ClassVar[str]
-        The name given to the wrapped data type by a zarr v3 data type specification. Note that this
-        is not necessarily the same name that will appear in metadata documents, as some data types
-        have names that depend on their configuration.
+        The name given to the data type by a Zarr v3 data type specification. This is a
+        class variable, and it should generally be unique across different data types.
     """
 
     # this class will create a native data type
@@ -86,7 +86,7 @@ class ZDType(Generic[TDType_co, TScalar_co], ABC):
     @classmethod
     def _check_native_dtype(cls: type[Self], dtype: TBaseDType) -> TypeGuard[TDType_co]:
         """
-        Check that a data type matches the dtype_cls class attribute. Used as a type guard.
+        Check that a native data type matches the dtype_cls class attribute. Used as a type guard.
 
         Parameters
         ----------
@@ -103,22 +103,28 @@ def _check_native_dtype(cls: type[Self], dtype: TBaseDType) -> TypeGuard[TDType_
     @classmethod
     def from_native_dtype(cls: type[Self], dtype: TBaseDType) -> Self:
         """
-        Wrap a dtype object.
+        Create a ZDType instance from a native data type. The default implementation first performs
+        a type check via ``cls._check_native_dtype``. If that type check succeeds, then
+        ``cls._from_native_dtype_unchecked`` is called, which assumes that the incoming object
+        as all the properties necessary for instantiating the ZDType.
+
+        This method is used when taking a user-provided native data type, like a NumPy data type,
+        and creating the corresponding ZDType instance from them.
 
         Parameters
         ----------
         dtype : TDType
-            The dtype object to wrap.
+            The native data type object to wrap.
 
         Returns
         -------
         Self
-            The wrapped dtype.
+            The ZDType that wraps the native data type.
 
         Raises
         ------
         TypeError
-            If the dtype does not match the dtype_cls class attribute.
+            If the native data type is not consistent with the wrapped data type.
         """
         if cls._check_native_dtype(dtype):
             return cls._from_native_dtype_unchecked(dtype)
@@ -130,7 +136,8 @@ def from_native_dtype(cls: type[Self], dtype: TBaseDType) -> Self:
     @abstractmethod
     def _from_native_dtype_unchecked(cls: type[Self], dtype: TBaseDType) -> Self:
         """
-        Wrap a native dtype without checking.
+        Create a ZDType instance from a native data type without performing any type checking of
+        that data type.
 
         Parameters
         ----------
@@ -140,19 +147,19 @@ def _from_native_dtype_unchecked(cls: type[Self], dtype: TBaseDType) -> Self:
         Returns
         -------
         Self
-            The wrapped dtype.
+            A ZDType that wraps the native dtype.
         """
         ...
 
     @abstractmethod
     def to_native_dtype(self: Self) -> TDType_co:
         """
-        Return an instance of the wrapped dtype.
+        Return an instance of the wrapped data type. This operation inverts ``from_native_dtype``.
 
         Returns
         -------
         TDType
-            The unwrapped dtype.
+            The native data type wrapped by this ZDType.
         """
         ...
 
@@ -220,10 +227,10 @@ def _cast_scalar_unchecked(self, data: object) -> TScalar_co:
     @abstractmethod
     def default_scalar(self) -> TScalar_co:
         """
-        Get the default scalar value for the wrapped data type. This is a method, rather than an attribute,
-        because the default value for some data types may depend on parameters that are not known
-        until a concrete data type is wrapped. For example, data types parametrized by a length like
-        fixed-length strings or bytes will generate scalars consistent with that length.
+        Get the default scalar value for the wrapped data type. This is a method, rather than an
+        attribute, because the default value for some data types depends on parameters that are
+        not known until a concrete data type is wrapped. For example, data types parametrized by a
+        length like fixed-length strings or bytes will generate scalars consistent with that length.
 
         Returns
         -------
@@ -238,7 +245,7 @@ def _check_json_v2(
         cls: type[Self], data: JSON, *, object_codec_id: str | None = None
     ) -> TypeGuard[DTypeJSON_V2]:
         """
-        Check that a JSON representation of a data type is consistent with the ZDType class.
+        Check that JSON data matches the Zarr V2 JSON serialization of this ZDType.
 
         Parameters
         ----------
@@ -246,17 +253,17 @@ def _check_json_v2(
             The JSON representation of the data type.
 
         object_codec_id : str | None
-            The object codec ID, if applicable. Object codecs are specific numcodecs codecs that
-            zarr-python 2.x used to serialize numpy "Object" scalars. For example, a dtype field set
-            to ``"|O"`` with an object codec ID of "vlen-utf8" indicates that the data type is a
-            variable-length string.
+            The string identifier of an object codec, if applicable. Object codecs are specific
+            numcodecs codecs that zarr-python 2.x used to serialize numpy "Object" scalars.
+            For example, a dtype field set to ``"|O"`` with an object codec ID of "vlen-utf8"
+            indicates that the data type is a variable-length string.
 
             Zarr V3 has no such logic, so this parameter is only used for Zarr V2 compatibility.
 
         Returns
         -------
         Bool
-            True if the JSON representation matches, False otherwise.
+            True if the JSON representation matches this data type, False otherwise.
         """
         ...
 
@@ -264,10 +271,7 @@ def _check_json_v2(
     @abstractmethod
     def _check_json_v3(cls: type[Self], data: JSON) -> TypeGuard[DTypeJSON_V3]:
         """
-        Check that a JSON representation of a data type matches the dtype_cls class attribute. Used
-        as a type guard. This base implementation checks that the input is a dictionary,
-        that the key "name" is in that dictionary, and that the value of "name"
-        matches the _zarr_v3_name class attribute.
+        Check that JSON data matches the Zarr V3 JSON serialization of this ZDType.
 
         Parameters
         ----------
@@ -290,7 +294,7 @@ def to_json(self, zarr_format: Literal[3]) -> DTypeJSON_V3: ...
     @abstractmethod
     def to_json(self, zarr_format: ZarrFormat) -> DTypeJSON_V2 | DTypeJSON_V3:
         """
-        Convert the wrapped data type to a JSON-serializable form.
+        Serialize this ZDType to JSON.
 
         Parameters
         ----------
@@ -307,7 +311,7 @@ def to_json(self, zarr_format: ZarrFormat) -> DTypeJSON_V2 | DTypeJSON_V3:
     @classmethod
     def from_json_v3(cls: type[Self], data: JSON) -> Self:
         """
-        Wrap a Zarr V3 JSON representation of a data type.
+        Create an instance of this ZDType from Zarr V3 JSON data.
 
         Parameters
         ----------
@@ -326,7 +330,7 @@ def from_json_v3(cls: type[Self], data: JSON) -> Self:
     @classmethod
     def from_json_v2(cls: type[Self], data: JSON, *, object_codec_id: str | None) -> Self:
         """
-        Wrap a Zarr V2 JSON representation of a data type.
+        Create an instance of this ZDType from Zarr V2 JSON data.
 
         Parameters
         ----------
@@ -377,19 +381,21 @@ def _from_json_unchecked(
     @abstractmethod
     def to_json_scalar(self, data: object, *, zarr_format: ZarrFormat) -> JSON:
         """
-        Convert a single value to JSON-serializable format.
+        Serialize a python object to the JSON representation of a scalar. The value will first be
+        cast to the scalar type associated with this ZDType, then serialized to JSON.
 
         Parameters
         ----------
         data : object
             The value to convert.
         zarr_format : ZarrFormat
-            The zarr format version.
+            The zarr format version. This is specified because the JSON serialization of scalars
+            differs between Zarr V2 and Zarr V3.
 
         Returns
         -------
         JSON
-            The JSON-serializable form of the scalar.
+            The JSON-serialized scalar.
         """
         ...
 
@@ -401,13 +407,14 @@ def from_json_scalar(self: Self, data: JSON, *, zarr_format: ZarrFormat) -> TSca
         Parameters
         ----------
         data : JSON
-            The JSON-serializable value.
+            A JSON representation of a scalar value.
         zarr_format : ZarrFormat
-            The zarr format version.
+            The zarr format version. This is specified because the JSON serialization of scalars
+            differs between Zarr V2 and Zarr V3.
 
         Returns
         -------
         TScalar
-            The native scalar value.
+            The deserialized scalar value.
         """
         ...

From 63de7c492c0cfd1b0d9ffe6b896be6a081fa0e68 Mon Sep 17 00:00:00 2001
From: Davis Bennett <davis.v.bennett@gmail.com>
Date: Wed, 11 Jun 2025 19:56:08 +0300
Subject: [PATCH 125/130] Update docs/user-guide/data_types.rst

Co-authored-by: Ryan Abernathey <ryan.abernathey@gmail.com>
---
 docs/user-guide/data_types.rst | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/user-guide/data_types.rst b/docs/user-guide/data_types.rst
index c101ae50fc..0150e025e3 100644
--- a/docs/user-guide/data_types.rst
+++ b/docs/user-guide/data_types.rst
@@ -83,9 +83,9 @@ Zarr V3 brings several key changes to how data types are represented:
     {
       "name": "numpy.datetime64",
       "configuration": {
-          "unit": "s",
-          "scale_factor": 10
-        }
+        "unit": "s",
+        "scale_factor": 10
+      }
     }
 
 

From b069d3684789ae67f60f5603e3d69e51792a2bcd Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Fri, 13 Jun 2025 18:53:56 +0300
Subject: [PATCH 126/130] refactor wrapper to allow subclasses to freely define
 their own type guards for native dtype and json input

---
 src/zarr/codecs/bytes.py                 |  15 +-
 src/zarr/core/dtype/common.py            |   8 +-
 src/zarr/core/dtype/npy/bool.py          |  58 +++--
 src/zarr/core/dtype/npy/bytes.py         | 126 +++++++---
 src/zarr/core/dtype/npy/common.py        |  65 +++--
 src/zarr/core/dtype/npy/complex.py       |  58 +++--
 src/zarr/core/dtype/npy/float.py         |  51 +++-
 src/zarr/core/dtype/npy/int.py           | 298 ++++++++++++++--------
 src/zarr/core/dtype/npy/string.py        | 308 ++++++++++++-----------
 src/zarr/core/dtype/npy/structured.py    | 138 +++++-----
 src/zarr/core/dtype/npy/time.py          | 157 ++++++++----
 src/zarr/core/dtype/wrapper.py           | 116 ++-------
 tests/test_array.py                      |  14 +-
 tests/test_dtype/test_npy/test_common.py |  12 +-
 14 files changed, 835 insertions(+), 589 deletions(-)

diff --git a/src/zarr/codecs/bytes.py b/src/zarr/codecs/bytes.py
index 6ef0fef60b..d663a3b2cc 100644
--- a/src/zarr/codecs/bytes.py
+++ b/src/zarr/codecs/bytes.py
@@ -3,7 +3,7 @@
 import sys
 from dataclasses import dataclass, replace
 from enum import Enum
-from typing import TYPE_CHECKING, cast
+from typing import TYPE_CHECKING
 
 import numpy as np
 
@@ -11,14 +11,12 @@
 from zarr.core.buffer import Buffer, NDArrayLike, NDBuffer
 from zarr.core.common import JSON, parse_enum, parse_named_configuration
 from zarr.core.dtype.common import HasEndianness
-from zarr.core.dtype.npy.common import endianness_to_numpy_str
 from zarr.registry import register_codec
 
 if TYPE_CHECKING:
     from typing import Self
 
     from zarr.core.array_spec import ArraySpec
-    from zarr.core.dtype.common import Endianness
 
 
 class Endian(Enum):
@@ -75,12 +73,11 @@ async def _decode_single(
     ) -> NDBuffer:
         assert isinstance(chunk_bytes, Buffer)
         # TODO: remove endianness enum in favor of literal union
-        endian_str = cast(
-            "Endianness | None", self.endian.value if self.endian is not None else None
-        )
-        new_byte_order = endianness_to_numpy_str(endian_str)
-        dtype = chunk_spec.dtype.to_native_dtype().newbyteorder(new_byte_order)
-
+        endian_str = self.endian.value if self.endian is not None else None
+        if isinstance(chunk_spec.dtype, HasEndianness):
+            dtype = replace(chunk_spec.dtype, endianness=endian_str).to_native_dtype()  # type: ignore[call-arg]
+        else:
+            dtype = chunk_spec.dtype.to_native_dtype()
         as_array_like = chunk_bytes.as_array_like()
         if isinstance(as_array_like, NDArrayLike):
             as_nd_array_like = as_array_like
diff --git a/src/zarr/core/dtype/common.py b/src/zarr/core/dtype/common.py
index 5630f1692e..9fabfa2737 100644
--- a/src/zarr/core/dtype/common.py
+++ b/src/zarr/core/dtype/common.py
@@ -4,7 +4,8 @@
 from dataclasses import dataclass
 from typing import ClassVar, Final, Literal
 
-Endianness = Literal["little", "big"]
+EndiannessStr = Literal["little", "big"]
+ENDIANNESS_STR: Final = "little", "big"
 SpecialFloatStrings = Literal["NaN", "Infinity", "-Infinity"]
 SPECIAL_FLOAT_STRINGS: Final = ("NaN", "Infinity", "-Infinity")
 JSONFloatV2 = float | SpecialFloatStrings
@@ -14,6 +15,9 @@
 class DataTypeValidationError(ValueError): ...
 
 
+class ScalarTypeValidationError(ValueError): ...
+
+
 @dataclass(frozen=True)
 class HasLength:
     """
@@ -30,7 +34,7 @@ class HasEndianness:
     A mix-in class for data types with an endianness attribute
     """
 
-    endianness: Endianness | None = "little"
+    endianness: EndiannessStr = "little"
 
 
 @dataclass(frozen=True)
diff --git a/src/zarr/core/dtype/npy/bool.py b/src/zarr/core/dtype/npy/bool.py
index bee42b6a13..2d045ce28a 100644
--- a/src/zarr/core/dtype/npy/bool.py
+++ b/src/zarr/core/dtype/npy/bool.py
@@ -4,9 +4,8 @@
 import numpy as np
 
 from zarr.core.common import JSON, ZarrFormat
-from zarr.core.dtype.common import HasItemSize
-from zarr.core.dtype.npy.common import check_json_bool
-from zarr.core.dtype.wrapper import DTypeJSON_V2, DTypeJSON_V3, TBaseDType, ZDType
+from zarr.core.dtype.common import DataTypeValidationError, HasItemSize
+from zarr.core.dtype.wrapper import TBaseDType, ZDType
 
 
 @dataclass(frozen=True, kw_only=True, slots=True)
@@ -23,14 +22,24 @@ class Bool(ZDType[np.dtypes.BoolDType, np.bool_], HasItemSize):
     """
 
     _zarr_v3_name: ClassVar[Literal["bool"]] = "bool"
-    _zarr_v2_names: ClassVar[tuple[str, ...]] = ("|b1",)
+    _zarr_v2_name: ClassVar[Literal["|b1"]] = "|b1"
     dtype_cls = np.dtypes.BoolDType
 
     @classmethod
-    def _from_native_dtype_unchecked(cls, dtype: TBaseDType) -> Self:
-        return cls()
+    def from_native_dtype(cls, dtype: TBaseDType) -> Self:
+        """
+        Create a Bool from a np.dtype('bool') instance.
+        """
+        if cls._check_native_dtype(dtype):
+            return cls()
+        raise DataTypeValidationError(
+            f"Invalid data type: {dtype}. Expected an instance of {cls.dtype_cls}"
+        )
 
     def to_native_dtype(self: Self) -> np.dtypes.BoolDType:
+        """
+        Create a NumPy boolean dtype instance from this ZDType
+        """
         return self.dtype_cls()
 
     @classmethod
@@ -38,14 +47,28 @@ def _check_json_v2(
         cls, data: JSON, *, object_codec_id: str | None = None
     ) -> TypeGuard[Literal["|b1"]]:
         """
-        Check that the input is a valid JSON representation of a bool.
+        Check that the input is a valid JSON representation of a Bool.
         """
-        return data in cls._zarr_v2_names
+        return data == cls._zarr_v2_name
 
     @classmethod
     def _check_json_v3(cls, data: JSON) -> TypeGuard[Literal["bool"]]:
         return data == cls._zarr_v3_name
 
+    @classmethod
+    def from_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> Self:
+        if cls._check_json_v2(data):
+            return cls()
+        msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected the string {cls._zarr_v2_name!r}"
+        raise DataTypeValidationError(msg)
+
+    @classmethod
+    def from_json_v3(cls: type[Self], data: JSON) -> Self:
+        if cls._check_json_v3(data):
+            return cls()
+        msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected the string {cls._zarr_v3_name!r}"
+        raise DataTypeValidationError(msg)
+
     @overload
     def to_json(self, zarr_format: Literal[2]) -> Literal["|b1"]: ...
 
@@ -54,17 +77,11 @@ def to_json(self, zarr_format: Literal[3]) -> Literal["bool"]: ...
 
     def to_json(self, zarr_format: ZarrFormat) -> Literal["|b1", "bool"]:
         if zarr_format == 2:
-            return self.to_native_dtype().str
+            return self._zarr_v2_name
         elif zarr_format == 3:
             return self._zarr_v3_name
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
-    @classmethod
-    def _from_json_unchecked(
-        cls, data: DTypeJSON_V2 | DTypeJSON_V3, *, zarr_format: ZarrFormat
-    ) -> Self:
-        return cls()
-
     def default_scalar(self) -> np.bool_:
         """
         Get the default value for the boolean dtype.
@@ -110,16 +127,19 @@ def from_json_scalar(self, data: JSON, *, zarr_format: ZarrFormat) -> np.bool_:
         np.bool_
             The numpy boolean scalar.
         """
-        if check_json_bool(data):
-            return self._cast_scalar_unchecked(data)
+        if self._check_scalar(data):
+            return np.bool_(data)
         raise TypeError(f"Invalid type: {data}. Expected a boolean.")  # pragma: no cover
 
     def _check_scalar(self, data: object) -> bool:
         # Anything can become a bool
         return True
 
-    def _cast_scalar_unchecked(self, data: object) -> np.bool_:
-        return np.bool_(data)
+    def cast_scalar(self, data: object) -> np.bool_:
+        if self._check_scalar(data):
+            return np.bool_(data)
+        msg = f"Cannot convert object with type {type(data)} to a numpy boolean."
+        raise TypeError(msg)
 
     @property
     def item_size(self) -> int:
diff --git a/src/zarr/core/dtype/npy/bytes.py b/src/zarr/core/dtype/npy/bytes.py
index d489f2f610..d98114e9e1 100644
--- a/src/zarr/core/dtype/npy/bytes.py
+++ b/src/zarr/core/dtype/npy/bytes.py
@@ -6,10 +6,18 @@
 import numpy as np
 
 from zarr.core.common import JSON, NamedConfig, ZarrFormat
-from zarr.core.dtype.common import HasItemSize, HasLength, HasObjectCodec, v3_unstable_dtype_warning
+from zarr.core.dtype.common import (
+    DataTypeValidationError,
+    HasItemSize,
+    HasLength,
+    HasObjectCodec,
+    v3_unstable_dtype_warning,
+)
 from zarr.core.dtype.npy.common import check_json_str
 from zarr.core.dtype.wrapper import DTypeJSON_V2, DTypeJSON_V3, TBaseDType, ZDType
 
+BytesLike = np.bytes_ | str | bytes | int
+
 
 class FixedLengthBytesConfig(TypedDict):
     length_bytes: int
@@ -25,8 +33,12 @@ class NullTerminatedBytes(ZDType[np.dtypes.BytesDType[int], np.bytes_], HasLengt
     _zarr_v3_name: ClassVar[Literal["null_terminated_bytes"]] = "null_terminated_bytes"
 
     @classmethod
-    def _from_native_dtype_unchecked(cls, dtype: TBaseDType) -> Self:
-        return cls(length=dtype.itemsize)
+    def from_native_dtype(cls, dtype: TBaseDType) -> Self:
+        if cls._check_native_dtype(dtype):
+            return cls(length=dtype.itemsize)
+        raise DataTypeValidationError(
+            f"Invalid data type: {dtype}. Expected an instance of {cls.dtype_cls}"
+        )
 
     def to_native_dtype(self) -> np.dtypes.BytesDType[int]:
         return self.dtype_cls(self.length)
@@ -49,6 +61,20 @@ def _check_json_v3(cls, data: JSON) -> TypeGuard[NullTerminatedBytesJSONV3]:
             and "length_bytes" in data["configuration"]
         )
 
+    @classmethod
+    def from_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> Self:
+        if cls._check_json_v2(data):
+            return cls(length=int(data[2:]))
+        msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected a string like '|S1', '|S2', etc"
+        raise DataTypeValidationError(msg)
+
+    @classmethod
+    def from_json_v3(cls, data: JSON) -> Self:
+        if cls._check_json_v3(data):
+            return cls(length=data["configuration"]["length_bytes"])
+        msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected the string {cls._zarr_v3_name!r}"
+        raise DataTypeValidationError(msg)
+
     @overload
     def to_json(self, zarr_format: Literal[2]) -> str: ...
 
@@ -90,11 +116,11 @@ def from_json_scalar(self, data: JSON, *, zarr_format: ZarrFormat) -> np.bytes_:
             f"Invalid type: {data}. Expected a base64-encoded string."
         )  # pragma: no cover
 
-    def _check_scalar(self, data: object) -> bool:
+    def _check_scalar(self, data: object) -> TypeGuard[BytesLike]:
         # this is generous for backwards compatibility
-        return isinstance(data, np.bytes_ | str | bytes | int)
+        return isinstance(data, BytesLike)
 
-    def _cast_scalar_unchecked(self, data: object) -> np.bytes_:
+    def _cast_scalar_unchecked(self, data: BytesLike) -> np.bytes_:
         # We explicitly truncate the result because of the following numpy behavior:
         # >>> x = np.dtype('S3').type('hello world')
         # >>> x
@@ -105,7 +131,13 @@ def _cast_scalar_unchecked(self, data: object) -> np.bytes_:
         if isinstance(data, int):
             return self.to_native_dtype().type(str(data)[: self.length])
         else:
-            return self.to_native_dtype().type(data[: self.length])  # type: ignore[index]
+            return self.to_native_dtype().type(data[: self.length])
+
+    def cast_scalar(self, data: object) -> np.bytes_:
+        if self._check_scalar(data):
+            return self._cast_scalar_unchecked(data)
+        msg = f"Cannot convert object with type {type(data)} to a numpy bytes scalar."
+        raise TypeError(msg)
 
     @property
     def item_size(self) -> int:
@@ -121,8 +153,12 @@ class RawBytes(ZDType[np.dtypes.VoidDType[int], np.void], HasLength, HasItemSize
     _zarr_v3_name: ClassVar[Literal["raw_bytes"]] = "raw_bytes"
 
     @classmethod
-    def _from_native_dtype_unchecked(cls, dtype: TBaseDType) -> Self:
-        return cls(length=dtype.itemsize)
+    def from_native_dtype(cls, dtype: TBaseDType) -> Self:
+        if cls._check_native_dtype(dtype):
+            return cls(length=dtype.itemsize)
+        raise DataTypeValidationError(
+            f"Invalid data type: {dtype}. Expected an instance of {cls.dtype_cls}"  # type: ignore[has-type]
+        )
 
     def to_native_dtype(self) -> np.dtypes.VoidDType[int]:
         # Numpy does not allow creating a void type
@@ -144,6 +180,20 @@ def _check_json_v3(cls, data: JSON) -> TypeGuard[RawBytesJSONV3]:
             and set(data["configuration"].keys()) == {"length_bytes"}
         )
 
+    @classmethod
+    def from_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> Self:
+        if cls._check_json_v2(data):
+            return cls(length=int(data[2:]))
+        msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected a string like '|V1', '|V2', etc"
+        raise DataTypeValidationError(msg)
+
+    @classmethod
+    def from_json_v3(cls, data: JSON) -> Self:
+        if cls._check_json_v3(data):
+            return cls(length=data["configuration"]["length_bytes"])
+        msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected the string {cls._zarr_v3_name!r}"
+        raise DataTypeValidationError(msg)
+
     @overload
     def to_json(self, zarr_format: Literal[2]) -> str: ...
 
@@ -158,16 +208,6 @@ def to_json(self, zarr_format: ZarrFormat) -> str | RawBytesJSONV3:
             return {"name": self._zarr_v3_name, "configuration": {"length_bytes": self.length}}
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
-    @classmethod
-    def _from_json_unchecked(
-        cls, data: DTypeJSON_V2 | DTypeJSON_V3, *, zarr_format: ZarrFormat
-    ) -> Self:
-        if zarr_format == 2:
-            return cls.from_native_dtype(np.dtype(data))  # type: ignore[arg-type]
-        elif zarr_format == 3:
-            return cls(length=data["configuration"]["length_bytes"])  # type: ignore[index, call-overload]
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
-
     @classmethod
     def _check_native_dtype(
         cls: type[Self], dtype: TBaseDType
@@ -212,6 +252,12 @@ def _cast_scalar_unchecked(self, data: object) -> np.void:
         # the result will actually be a V10 scalar.
         return native_dtype.type(data, native_dtype)
 
+    def cast_scalar(self, data: object) -> np.void:
+        if self._check_scalar(data):
+            return self._cast_scalar_unchecked(data)
+        msg = f"Cannot convert object with type {type(data)} to a numpy void scalar."
+        raise TypeError(msg)
+
     @property
     def item_size(self) -> int:
         return self.length
@@ -224,8 +270,12 @@ class VariableLengthBytes(ZDType[np.dtypes.ObjectDType, bytes], HasObjectCodec):
     object_codec_id = "vlen-bytes"
 
     @classmethod
-    def _from_native_dtype_unchecked(cls, dtype: TBaseDType) -> Self:
-        return cls()
+    def from_native_dtype(cls, dtype: TBaseDType) -> Self:
+        if cls._check_native_dtype(dtype):
+            return cls()
+        raise DataTypeValidationError(
+            f"Invalid data type: {dtype}. Expected an instance of {cls.dtype_cls}"
+        )
 
     def to_native_dtype(self) -> np.dtypes.ObjectDType:
         return self.dtype_cls()
@@ -244,6 +294,20 @@ def _check_json_v2(
     def _check_json_v3(cls, data: JSON) -> TypeGuard[Literal["variable_length_bytes"]]:
         return data == cls._zarr_v3_name
 
+    @classmethod
+    def from_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> Self:
+        if cls._check_json_v2(data, object_codec_id=object_codec_id):
+            return cls()
+        msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected the string '|O' and an object_codec_id of {cls.object_codec_id}"
+        raise DataTypeValidationError(msg)
+
+    @classmethod
+    def from_json_v3(cls, data: JSON) -> Self:
+        if cls._check_json_v3(data):
+            return cls()
+        msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected the string {cls._zarr_v3_name!r}"
+        raise DataTypeValidationError(msg)
+
     @overload
     def to_json(self, zarr_format: Literal[2]) -> Literal["|O"]: ...
 
@@ -258,12 +322,6 @@ def to_json(self, zarr_format: ZarrFormat) -> Literal["|O", "variable_length_byt
             return self._zarr_v3_name
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
-    @classmethod
-    def _from_json_unchecked(
-        cls, data: DTypeJSON_V2 | DTypeJSON_V3, *, zarr_format: ZarrFormat
-    ) -> Self:
-        return cls()
-
     def default_scalar(self) -> bytes:
         return b""
 
@@ -275,10 +333,16 @@ def from_json_scalar(self, data: JSON, *, zarr_format: ZarrFormat) -> bytes:
             return base64.standard_b64decode(data.encode("ascii"))
         raise TypeError(f"Invalid type: {data}. Expected a string.")  # pragma: no cover
 
-    def _check_scalar(self, data: object) -> bool:
-        return isinstance(data, bytes | str)
+    def _check_scalar(self, data: object) -> TypeGuard[BytesLike]:
+        return isinstance(data, BytesLike)
 
-    def _cast_scalar_unchecked(self, data: object) -> bytes:
+    def _cast_scalar_unchecked(self, data: BytesLike) -> bytes:
         if isinstance(data, str):
             return bytes(data, encoding="utf-8")
-        return bytes(data)  # type: ignore[no-any-return, call-overload]
+        return bytes(data)
+
+    def cast_scalar(self, data: object) -> bytes:
+        if self._check_scalar(data):
+            return self._cast_scalar_unchecked(data)
+        msg = f"Cannot convert object with type {type(data)} to bytes."
+        raise TypeError(msg)
diff --git a/src/zarr/core/dtype/npy/common.py b/src/zarr/core/dtype/npy/common.py
index 03dc194a7a..264561f25c 100644
--- a/src/zarr/core/dtype/npy/common.py
+++ b/src/zarr/core/dtype/npy/common.py
@@ -7,6 +7,7 @@
 from typing import (
     TYPE_CHECKING,
     Any,
+    Final,
     Literal,
     SupportsComplex,
     SupportsFloat,
@@ -14,12 +15,17 @@
     SupportsInt,
     TypeGuard,
     TypeVar,
-    get_args,
 )
 
 import numpy as np
 
-from zarr.core.dtype.common import SPECIAL_FLOAT_STRINGS, Endianness, JSONFloatV2, JSONFloatV3
+from zarr.core.dtype.common import (
+    ENDIANNESS_STR,
+    SPECIAL_FLOAT_STRINGS,
+    EndiannessStr,
+    JSONFloatV2,
+    JSONFloatV3,
+)
 
 if TYPE_CHECKING:
     from zarr.core.common import JSON, ZarrFormat
@@ -30,7 +36,26 @@
 DateTimeUnit = Literal[
     "Y", "M", "W", "D", "h", "m", "s", "ms", "us", "μs", "ns", "ps", "fs", "as", "generic"
 ]
-EndiannessNumpy = Literal[">", "<", "|", "="]
+DATETIME_UNIT: Final = (
+    "Y",
+    "M",
+    "W",
+    "D",
+    "h",
+    "m",
+    "s",
+    "ms",
+    "us",
+    "μs",
+    "ns",
+    "ps",
+    "fs",
+    "as",
+    "generic",
+)
+
+NumpyEndiannessStr = Literal[">", "<", "="]
+NUMPY_ENDIANNESS_STR: Final = ">", "<", "="
 
 TFloatDType_co = TypeVar(
     "TFloatDType_co",
@@ -47,18 +72,18 @@
 TComplexScalar_co = TypeVar("TComplexScalar_co", bound=np.complex64 | np.complex128, covariant=True)
 
 
-def endianness_from_numpy_str(endianness: EndiannessNumpy) -> Endianness | None:
+def endianness_from_numpy_str(endianness: NumpyEndiannessStr) -> EndiannessStr:
     """
     Convert a numpy endianness string literal to a human-readable literal value.
 
     Parameters
     ----------
-    endianness : Literal[">", "<", "=", "|"]
+    endianness : Literal[">", "<", "="]
         The numpy string representation of the endianness.
 
     Returns
     -------
-    Endianness or None
+    Endianness
         The human-readable representation of the endianness.
 
     Raises
@@ -74,26 +99,21 @@ def endianness_from_numpy_str(endianness: EndiannessNumpy) -> Endianness | None:
             return "little"
         case ">":
             return "big"
-        case "|":
-            # for dtypes without byte ordering semantics
-            return None
-    raise ValueError(
-        f"Invalid endianness: {endianness!r}. Expected one of {get_args(EndiannessNumpy)}"
-    )
+    raise ValueError(f"Invalid endianness: {endianness!r}. Expected one of {NUMPY_ENDIANNESS_STR}")
 
 
-def endianness_to_numpy_str(endianness: Endianness | None) -> EndiannessNumpy:
+def endianness_to_numpy_str(endianness: EndiannessStr) -> NumpyEndiannessStr:
     """
     Convert an endianness literal to its numpy string representation.
 
     Parameters
     ----------
-    endianness : Endianness or None
+    endianness : Endianness
         The endianness to convert.
 
     Returns
     -------
-    Literal[">", "<", "|"]
+    Literal[">", "<"]
         The numpy string representation of the endianness.
 
     Raises
@@ -106,13 +126,22 @@ def endianness_to_numpy_str(endianness: Endianness | None) -> EndiannessNumpy:
             return "<"
         case "big":
             return ">"
-        case None:
-            return "|"
     raise ValueError(
-        f"Invalid endianness: {endianness!r}. Expected one of {get_args(Endianness)} or None"
+        f"Invalid endianness: {endianness!r}. Expected one of {ENDIANNESS_STR} or None"
     )
 
 
+def get_endianness_from_numpy_dtype(dtype: np.dtype[np.generic]) -> EndiannessStr:
+    """
+    Gets the endianness from a numpy dtype that has an endianness. This function will
+    raise a ValueError if the numpy data type does not have a concrete endianness.
+    """
+    endianness = dtype.byteorder
+    if dtype.byteorder in NUMPY_ENDIANNESS_STR:
+        return endianness_from_numpy_str(endianness)  # type: ignore [arg-type]
+    raise ValueError(f"The dtype {dtype} has an unsupported endianness: {endianness}")
+
+
 def float_from_json_v2(data: JSONFloatV2) -> float:
     """
     Convert a JSON float to a float (Zarr v2).
diff --git a/src/zarr/core/dtype/npy/complex.py b/src/zarr/core/dtype/npy/complex.py
index f68640e4ce..2df60f930b 100644
--- a/src/zarr/core/dtype/npy/complex.py
+++ b/src/zarr/core/dtype/npy/complex.py
@@ -1,17 +1,15 @@
 from dataclasses import dataclass
 from typing import (
-    TYPE_CHECKING,
     ClassVar,
     Literal,
     Self,
     TypeGuard,
-    cast,
 )
 
 import numpy as np
 
 from zarr.core.common import JSON, ZarrFormat
-from zarr.core.dtype.common import HasEndianness, HasItemSize
+from zarr.core.dtype.common import DataTypeValidationError, HasEndianness, HasItemSize
 from zarr.core.dtype.npy.common import (
     ComplexLike,
     TComplexDType_co,
@@ -22,13 +20,10 @@
     complex_float_from_json_v3,
     complex_float_to_json_v2,
     complex_float_to_json_v3,
-    endianness_from_numpy_str,
     endianness_to_numpy_str,
+    get_endianness_from_numpy_dtype,
 )
-from zarr.core.dtype.wrapper import DTypeJSON_V2, DTypeJSON_V3, TBaseDType, ZDType
-
-if TYPE_CHECKING:
-    from zarr.core.dtype.npy.common import EndiannessNumpy
+from zarr.core.dtype.wrapper import TBaseDType, ZDType
 
 
 @dataclass(frozen=True)
@@ -37,9 +32,12 @@ class BaseComplex(ZDType[TComplexDType_co, TComplexScalar_co], HasEndianness, Ha
     _zarr_v2_names: ClassVar[tuple[str, ...]]
 
     @classmethod
-    def _from_native_dtype_unchecked(cls, dtype: TBaseDType) -> Self:
-        byte_order = cast("EndiannessNumpy", dtype.byteorder)
-        return cls(endianness=endianness_from_numpy_str(byte_order))
+    def from_native_dtype(cls, dtype: TBaseDType) -> Self:
+        if cls._check_native_dtype(dtype):
+            return cls(endianness=get_endianness_from_numpy_dtype(dtype))
+        raise DataTypeValidationError(
+            f"Invalid data type: {dtype}. Expected an instance of {cls.dtype_cls}"
+        )
 
     def to_native_dtype(self) -> TComplexDType_co:
         byte_order = endianness_to_numpy_str(self.endianness)
@@ -65,16 +63,6 @@ def to_json(self, zarr_format: ZarrFormat) -> str:
             return self._zarr_v3_name
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
-    @classmethod
-    def _from_json_unchecked(
-        cls, data: DTypeJSON_V2 | DTypeJSON_V3, *, zarr_format: ZarrFormat
-    ) -> Self:
-        if zarr_format == 2:
-            return cls.from_native_dtype(np.dtype(data))  # type: ignore[arg-type]
-        elif zarr_format == 3:
-            return cls()
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
-
     @classmethod
     def _check_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> TypeGuard[str]:
         """
@@ -86,11 +74,33 @@ def _check_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> Ty
     def _check_json_v3(cls, data: JSON) -> TypeGuard[str]:
         return data == cls._zarr_v3_name
 
-    def _check_scalar(self, data: object) -> bool:
+    @classmethod
+    def from_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> Self:
+        if cls._check_json_v2(data, object_codec_id=object_codec_id):
+            # Going via numpy ensures that we get the endianness correct without
+            # annoying string parsing.
+            return cls.from_native_dtype(np.dtype(data))
+        msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected one of the strings {cls._zarr_v2_names}."
+        raise DataTypeValidationError(msg)
+
+    @classmethod
+    def from_json_v3(cls, data: JSON) -> Self:
+        if cls._check_json_v3(data):
+            return cls()
+        msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected {cls._zarr_v3_name}."
+        raise DataTypeValidationError(msg)
+
+    def _check_scalar(self, data: object) -> TypeGuard[ComplexLike]:
         return isinstance(data, ComplexLike)
 
-    def _cast_scalar_unchecked(self, data: object) -> TComplexScalar_co:
-        return self.to_native_dtype().type(data)  # type: ignore[arg-type, return-value]
+    def cast_scalar(self, data: object) -> TComplexScalar_co:
+        if self._check_scalar(data):
+            return self._cast_scalar_unchecked(data)
+        msg = f"Cannot convert object with type {type(data)} to a numpy float scalar."
+        raise TypeError(msg)
+
+    def _cast_scalar_unchecked(self, data: ComplexLike) -> TComplexScalar_co:
+        return self.to_native_dtype().type(data)  # type: ignore[return-value]
 
     def default_scalar(self) -> TComplexScalar_co:
         """
diff --git a/src/zarr/core/dtype/npy/float.py b/src/zarr/core/dtype/npy/float.py
index f87f032581..60a05326d5 100644
--- a/src/zarr/core/dtype/npy/float.py
+++ b/src/zarr/core/dtype/npy/float.py
@@ -1,23 +1,27 @@
 from dataclasses import dataclass
-from typing import ClassVar, Self, TypeGuard, cast
+from typing import ClassVar, Self, TypeGuard
 
 import numpy as np
 
 from zarr.core.common import JSON, ZarrFormat
-from zarr.core.dtype.common import HasEndianness, HasItemSize
+from zarr.core.dtype.common import (
+    DataTypeValidationError,
+    HasEndianness,
+    HasItemSize,
+    ScalarTypeValidationError,
+)
 from zarr.core.dtype.npy.common import (
-    EndiannessNumpy,
     FloatLike,
     TFloatDType_co,
     TFloatScalar_co,
     check_json_float_v2,
     check_json_float_v3,
-    endianness_from_numpy_str,
     endianness_to_numpy_str,
     float_from_json_v2,
     float_from_json_v3,
     float_to_json_v2,
     float_to_json_v3,
+    get_endianness_from_numpy_dtype,
 )
 from zarr.core.dtype.wrapper import DTypeJSON_V2, DTypeJSON_V3, TBaseDType, ZDType
 
@@ -28,9 +32,12 @@ class BaseFloat(ZDType[TFloatDType_co, TFloatScalar_co], HasEndianness, HasItemS
     _zarr_v2_names: ClassVar[tuple[str, ...]]
 
     @classmethod
-    def _from_native_dtype_unchecked(cls, dtype: TBaseDType) -> Self:
-        byte_order = cast("EndiannessNumpy", dtype.byteorder)
-        return cls(endianness=endianness_from_numpy_str(byte_order))
+    def from_native_dtype(cls, dtype: TBaseDType) -> Self:
+        if cls._check_native_dtype(dtype):
+            return cls(endianness=get_endianness_from_numpy_dtype(dtype))
+        raise DataTypeValidationError(
+            f"Invalid data type: {dtype}. Expected an instance of {cls.dtype_cls}"
+        )
 
     def to_native_dtype(self) -> TFloatDType_co:
         byte_order = endianness_to_numpy_str(self.endianness)
@@ -77,11 +84,33 @@ def _check_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> Ty
     def _check_json_v3(cls, data: JSON) -> TypeGuard[str]:
         return data == cls._zarr_v3_name
 
+    @classmethod
+    def from_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> Self:
+        if cls._check_json_v2(data, object_codec_id=object_codec_id):
+            # Going via numpy ensures that we get the endianness correct without
+            # annoying string parsing.
+            return cls.from_native_dtype(np.dtype(data))
+        msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected one of the strings {cls._zarr_v2_names}."
+        raise DataTypeValidationError(msg)
+
+    @classmethod
+    def from_json_v3(cls, data: JSON) -> Self:
+        if cls._check_json_v3(data):
+            return cls()
+        msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected {cls._zarr_v3_name}."
+        raise DataTypeValidationError(msg)
+
     def _check_scalar(self, data: object) -> TypeGuard[FloatLike]:
         return isinstance(data, FloatLike)
 
-    def _cast_scalar_unchecked(self, data: object) -> TFloatScalar_co:
-        return self.to_native_dtype().type(data)  # type: ignore[return-value, arg-type]
+    def cast_scalar(self, data: object) -> TFloatScalar_co:
+        if self._check_scalar(data):
+            return self._cast_scalar_unchecked(data)
+        msg = f"Cannot convert object with type {type(data)} to a numpy float scalar."
+        raise ScalarTypeValidationError(msg)
+
+    def _cast_scalar_unchecked(self, data: FloatLike) -> TFloatScalar_co:
+        return self.to_native_dtype().type(data)  # type: ignore[return-value]
 
     def default_scalar(self) -> TFloatScalar_co:
         """
@@ -145,9 +174,9 @@ def to_json_scalar(self, data: object, *, zarr_format: ZarrFormat) -> float | st
             See the zarr specifications for details on the JSON encoding for floats.
         """
         if zarr_format == 2:
-            return float_to_json_v2(self._cast_scalar_unchecked(data))
+            return float_to_json_v2(self.cast_scalar(data))
         elif zarr_format == 3:
-            return float_to_json_v3(self._cast_scalar_unchecked(data))
+            return float_to_json_v3(self.cast_scalar(data))
         else:
             raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
diff --git a/src/zarr/core/dtype/npy/int.py b/src/zarr/core/dtype/npy/int.py
index aed577ee44..804e9e359a 100644
--- a/src/zarr/core/dtype/npy/int.py
+++ b/src/zarr/core/dtype/npy/int.py
@@ -7,21 +7,19 @@
     SupportsInt,
     TypeGuard,
     TypeVar,
-    cast,
     overload,
 )
 
 import numpy as np
 
 from zarr.core.common import JSON, ZarrFormat
-from zarr.core.dtype.common import HasEndianness, HasItemSize
+from zarr.core.dtype.common import DataTypeValidationError, HasEndianness, HasItemSize
 from zarr.core.dtype.npy.common import (
-    EndiannessNumpy,
     check_json_int,
-    endianness_from_numpy_str,
     endianness_to_numpy_str,
+    get_endianness_from_numpy_dtype,
 )
-from zarr.core.dtype.wrapper import DTypeJSON_V2, DTypeJSON_V3, TBaseDType, ZDType
+from zarr.core.dtype.wrapper import TBaseDType, ZDType
 
 _NumpyIntDType = (
     np.dtypes.Int8DType
@@ -43,7 +41,7 @@
 
 @dataclass(frozen=True)
 class BaseInt(ZDType[TIntDType_co, TIntScalar_co], HasItemSize):
-    # This attribute holds the possible zarr v2 JSON names for the data type
+    # This attribute holds the possible zarr V2 JSON names for the data type
     _zarr_v2_names: ClassVar[tuple[str, ...]]
 
     @classmethod
@@ -55,13 +53,28 @@ def _check_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> Ty
 
     @classmethod
     def _check_json_v3(cls, data: JSON) -> TypeGuard[str]:
+        """
+        Check that a JSON value is consistent with the zarr v3 spec for this data type.
+        """
         return data == cls._zarr_v3_name
 
     def _check_scalar(self, data: object) -> TypeGuard[IntLike]:
+        """
+        Check that a python object is IntLike
+        """
         return isinstance(data, IntLike)
 
-    def _cast_scalar_unchecked(self, data: object) -> TIntScalar_co:
-        return self.to_native_dtype().type(data)  # type: ignore[return-value, arg-type]
+    def _cast_scalar_unchecked(self, data: IntLike) -> TIntScalar_co:
+        """
+        Create an integer without any type checking of the input.
+        """
+        return self.to_native_dtype().type(data)  # type: ignore[return-value]
+
+    def cast_scalar(self, data: object) -> TIntScalar_co:
+        if self._check_scalar(data):
+            return self._cast_scalar_unchecked(data)
+        msg = f"Cannot convert object with type {type(data)} to a numpy integer."
+        raise TypeError(msg)
 
     def default_scalar(self) -> TIntScalar_co:
         """
@@ -117,7 +130,18 @@ def to_json_scalar(self, data: object, *, zarr_format: ZarrFormat) -> int:
 class Int8(BaseInt[np.dtypes.Int8DType, np.int8]):
     dtype_cls = np.dtypes.Int8DType
     _zarr_v3_name: ClassVar[Literal["int8"]] = "int8"
-    _zarr_v2_names: ClassVar[tuple[str, ...]] = ("|i1",)
+    _zarr_v2_names: ClassVar[tuple[Literal["|i1"]]] = ("|i1",)
+
+    @classmethod
+    def from_native_dtype(cls, dtype: TBaseDType) -> Self:
+        """
+        Create a Int8 from a np.dtype('int8') instance.
+        """
+        if cls._check_native_dtype(dtype):
+            return cls()
+        raise DataTypeValidationError(
+            f"Invalid data type: {dtype}. Expected an instance of {cls.dtype_cls}"
+        )
 
     @overload
     def to_json(self, zarr_format: Literal[2]) -> Literal["|i1"]: ...
@@ -140,23 +164,27 @@ def to_json(self, zarr_format: ZarrFormat) -> Literal["int8", "|i1"]:
             The JSON-serializable representation of the wrapped data type
         """
         if zarr_format == 2:
-            return self.to_native_dtype().str
+            return self._zarr_v2_names[0]
         elif zarr_format == 3:
             return self._zarr_v3_name
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
-    @classmethod
-    def _from_native_dtype_unchecked(cls, dtype: TBaseDType) -> Self:
-        return cls()
-
     def to_native_dtype(self: Self) -> np.dtypes.Int8DType:
         return self.dtype_cls()
 
     @classmethod
-    def _from_json_unchecked(
-        cls, data: DTypeJSON_V2 | DTypeJSON_V3, *, zarr_format: ZarrFormat
-    ) -> Self:
-        return cls()
+    def from_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> Self:
+        if cls._check_json_v2(data, object_codec_id=object_codec_id):
+            return cls()
+        msg = f"Invalid JSON representation of Int8. Got {data!r}, expected the string {cls._zarr_v2_names[0]!r}"
+        raise DataTypeValidationError(msg)
+
+    @classmethod
+    def from_json_v3(cls, data: JSON) -> Self:
+        if cls._check_json_v3(data):
+            return cls()
+        msg = f"Invalid JSON representation of Int8. Got {data!r}, expected the string {cls._zarr_v3_name!r}"
+        raise DataTypeValidationError(msg)
 
     @property
     def item_size(self) -> int:
@@ -167,7 +195,18 @@ def item_size(self) -> int:
 class UInt8(BaseInt[np.dtypes.UInt8DType, np.uint8]):
     dtype_cls = np.dtypes.UInt8DType
     _zarr_v3_name: ClassVar[Literal["uint8"]] = "uint8"
-    _zarr_v2_names: ClassVar[tuple[str, ...]] = ("|u1",)
+    _zarr_v2_names: ClassVar[tuple[Literal["|u1"]]] = ("|u1",)
+
+    @classmethod
+    def from_native_dtype(cls, dtype: TBaseDType) -> Self:
+        """
+        Create a Bool from a np.dtype('uint8') instance.
+        """
+        if cls._check_native_dtype(dtype):
+            return cls()
+        raise DataTypeValidationError(
+            f"Invalid data type: {dtype}. Expected an instance of {cls.dtype_cls}"
+        )
 
     @overload
     def to_json(self, zarr_format: Literal[2]) -> Literal["|u1"]: ...
@@ -190,23 +229,27 @@ def to_json(self, zarr_format: ZarrFormat) -> Literal["uint8", "|u1"]:
             The JSON-serializable representation of the wrapped data type
         """
         if zarr_format == 2:
-            return self.to_native_dtype().str
+            return self._zarr_v2_names[0]
         elif zarr_format == 3:
             return self._zarr_v3_name
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
-    @classmethod
-    def _from_native_dtype_unchecked(cls, dtype: TBaseDType) -> Self:
-        return cls()
-
     def to_native_dtype(self: Self) -> np.dtypes.UInt8DType:
         return self.dtype_cls()
 
     @classmethod
-    def _from_json_unchecked(
-        cls, data: DTypeJSON_V2 | DTypeJSON_V3, *, zarr_format: ZarrFormat
-    ) -> Self:
-        return cls()
+    def from_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> Self:
+        if cls._check_json_v2(data, object_codec_id=object_codec_id):
+            return cls()
+        msg = f"Invalid JSON representation of UInt8. Got {data!r}, expected the string {cls._zarr_v2_names[0]!r}"
+        raise DataTypeValidationError(msg)
+
+    @classmethod
+    def from_json_v3(cls, data: JSON) -> Self:
+        if cls._check_json_v3(data):
+            return cls()
+        msg = f"Invalid JSON representation of UInt8. Got {data!r}, expected the string {cls._zarr_v3_name!r}"
+        raise DataTypeValidationError(msg)
 
     @property
     def item_size(self) -> int:
@@ -217,7 +260,7 @@ def item_size(self) -> int:
 class Int16(BaseInt[np.dtypes.Int16DType, np.int16], HasEndianness):
     dtype_cls = np.dtypes.Int16DType
     _zarr_v3_name: ClassVar[Literal["int16"]] = "int16"
-    _zarr_v2_names: ClassVar[tuple[str, ...]] = (">i2", "<i2")
+    _zarr_v2_names: ClassVar[tuple[Literal[">i2"], Literal["<i2"]]] = (">i2", "<i2")
 
     @overload
     def to_json(self, zarr_format: Literal[2]) -> Literal[">i2", "<i2"]: ...
@@ -246,24 +289,32 @@ def to_json(self, zarr_format: ZarrFormat) -> Literal["int16", ">i2", "<i2"]:
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     @classmethod
-    def _from_native_dtype_unchecked(cls, dtype: TBaseDType) -> Self:
-        byte_order = cast("EndiannessNumpy", dtype.byteorder)
-        return cls(endianness=endianness_from_numpy_str(byte_order))
+    def from_native_dtype(cls, dtype: TBaseDType) -> Self:
+        if cls._check_native_dtype(dtype):
+            return cls(endianness=get_endianness_from_numpy_dtype(dtype))
+        raise DataTypeValidationError(
+            f"Invalid data type: {dtype}. Expected an instance of {cls.dtype_cls}"
+        )
 
     def to_native_dtype(self) -> np.dtypes.Int16DType:
         byte_order = endianness_to_numpy_str(self.endianness)
         return self.dtype_cls().newbyteorder(byte_order)
 
     @classmethod
-    def _from_json_unchecked(
-        cls, data: DTypeJSON_V2 | DTypeJSON_V3, *, zarr_format: ZarrFormat
-    ) -> Self:
-        if zarr_format == 2:
-            # This ensures that we get the endianness correct without annoying string parsing
-            return cls.from_native_dtype(np.dtype(data))  # type: ignore[arg-type]
-        elif zarr_format == 3:
+    def from_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> Self:
+        if cls._check_json_v2(data, object_codec_id=object_codec_id):
+            # Going via numpy ensures that we get the endianness correct without
+            # annoying string parsing.
+            return cls.from_native_dtype(np.dtype(data))
+        msg = f"Invalid JSON representation of Int16. Got {data!r}, expected one of the strings {cls._zarr_v2_names!r}."
+        raise DataTypeValidationError(msg)
+
+    @classmethod
+    def from_json_v3(cls, data: JSON) -> Self:
+        if cls._check_json_v3(data):
             return cls()
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+        msg = f"Invalid JSON representation of Int16. Got {data!r}, expected the string {cls._zarr_v3_name!r}"
+        raise DataTypeValidationError(msg)
 
     @property
     def item_size(self) -> int:
@@ -274,7 +325,7 @@ def item_size(self) -> int:
 class UInt16(BaseInt[np.dtypes.UInt16DType, np.uint16], HasEndianness):
     dtype_cls = np.dtypes.UInt16DType
     _zarr_v3_name: ClassVar[Literal["uint16"]] = "uint16"
-    _zarr_v2_names: ClassVar[tuple[str, ...]] = (">u2", "<u2")
+    _zarr_v2_names: ClassVar[tuple[Literal[">u2"], Literal["<u2"]]] = (">u2", "<u2")
 
     @overload
     def to_json(self, zarr_format: Literal[2]) -> Literal[">u2", "<u2"]: ...
@@ -303,23 +354,32 @@ def to_json(self, zarr_format: ZarrFormat) -> Literal["uint16", ">u2", "<u2"]:
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     @classmethod
-    def _from_native_dtype_unchecked(cls, dtype: TBaseDType) -> Self:
-        byte_order = cast("EndiannessNumpy", dtype.byteorder)
-        return cls(endianness=endianness_from_numpy_str(byte_order))
+    def from_native_dtype(cls, dtype: TBaseDType) -> Self:
+        if cls._check_native_dtype(dtype):
+            return cls(endianness=get_endianness_from_numpy_dtype(dtype))
+        raise DataTypeValidationError(
+            f"Invalid data type: {dtype}. Expected an instance of {cls.dtype_cls}"
+        )
 
     def to_native_dtype(self) -> np.dtypes.UInt16DType:
         byte_order = endianness_to_numpy_str(self.endianness)
         return self.dtype_cls().newbyteorder(byte_order)
 
     @classmethod
-    def _from_json_unchecked(
-        cls, data: DTypeJSON_V2 | DTypeJSON_V3, *, zarr_format: ZarrFormat
-    ) -> Self:
-        if zarr_format == 2:
-            return cls.from_native_dtype(np.dtype(data))  # type: ignore[arg-type]
-        elif zarr_format == 3:
+    def from_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> Self:
+        if cls._check_json_v2(data, object_codec_id=object_codec_id):
+            # Going via numpy ensures that we get the endianness correct without
+            # annoying string parsing.
+            return cls.from_native_dtype(np.dtype(data))
+        msg = f"Invalid JSON representation of UInt16. Got {data!r}, expected one of the strings {cls._zarr_v2_names}."
+        raise DataTypeValidationError(msg)
+
+    @classmethod
+    def from_json_v3(cls, data: JSON) -> Self:
+        if cls._check_json_v3(data):
             return cls()
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+        msg = f"Invalid JSON representation of UInt16. Got {data!r}, expected the string {cls._zarr_v3_name!r}"
+        raise DataTypeValidationError(msg)
 
     @property
     def item_size(self) -> int:
@@ -330,7 +390,7 @@ def item_size(self) -> int:
 class Int32(BaseInt[np.dtypes.Int32DType, np.int32], HasEndianness):
     dtype_cls = np.dtypes.Int32DType
     _zarr_v3_name: ClassVar[Literal["int32"]] = "int32"
-    _zarr_v2_names: ClassVar[tuple[str, ...]] = (">i4", "<i4")
+    _zarr_v2_names: ClassVar[tuple[Literal[">i4"], Literal["<i4"]]] = (">i4", "<i4")
 
     @overload
     def to_json(self, zarr_format: Literal[2]) -> Literal[">i4", "<i4"]: ...
@@ -360,36 +420,31 @@ def to_json(self, zarr_format: ZarrFormat) -> Literal["int32", ">i4", "<i4"]:
 
     @classmethod
     def from_native_dtype(cls: type[Self], dtype: TBaseDType) -> Self:
-        # We override the base implementation to address a windows-specific, pre-numpy 2 issue where
-        # ``np.dtype('i')`` is an instance of ``np.dtypes.IntDType`` that acts like `int32` instead of ``np.dtype('int32')``
-        # In this case, ``type(np.dtype('i')) == np.dtypes.Int32DType``  will evaluate to ``True``,
-        # despite the two classes being different. Thus we will create an instance of `cls` with the
-        # latter dtype, after pulling in the byte order of the input
-        if dtype == np.dtypes.Int32DType():
-            return cls._from_native_dtype_unchecked(
-                np.dtypes.Int32DType().newbyteorder(dtype.byteorder)
-            )
-        else:
-            return super().from_native_dtype(dtype)
-
-    @classmethod
-    def _from_native_dtype_unchecked(cls, dtype: TBaseDType) -> Self:
-        byte_order = cast("EndiannessNumpy", dtype.byteorder)
-        return cls(endianness=endianness_from_numpy_str(byte_order))
+        if cls._check_native_dtype(dtype):
+            return cls(endianness=get_endianness_from_numpy_dtype(dtype))
+        raise DataTypeValidationError(
+            f"Invalid data type: {dtype}. Expected an instance of {cls.dtype_cls}"
+        )
 
     def to_native_dtype(self) -> np.dtypes.Int32DType:
         byte_order = endianness_to_numpy_str(self.endianness)
         return self.dtype_cls().newbyteorder(byte_order)
 
     @classmethod
-    def _from_json_unchecked(
-        cls, data: DTypeJSON_V2 | DTypeJSON_V3, *, zarr_format: ZarrFormat
-    ) -> Self:
-        if zarr_format == 2:
-            return cls.from_native_dtype(np.dtype(data))  # type: ignore[arg-type]
-        elif zarr_format == 3:
+    def from_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> Self:
+        if cls._check_json_v2(data, object_codec_id=object_codec_id):
+            # Going via numpy ensures that we get the endianness correct without
+            # annoying string parsing.
+            return cls.from_native_dtype(np.dtype(data))
+        msg = f"Invalid JSON representation of Int32. Got {data!r}, expected one of the strings {cls._zarr_v2_names}."
+        raise DataTypeValidationError(msg)
+
+    @classmethod
+    def from_json_v3(cls, data: JSON) -> Self:
+        if cls._check_json_v3(data):
             return cls()
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+        msg = f"Invalid JSON representation of Int32. Got {data!r}, expected the string {cls._zarr_v3_name!r}"
+        raise DataTypeValidationError(msg)
 
     @property
     def item_size(self) -> int:
@@ -400,7 +455,7 @@ def item_size(self) -> int:
 class UInt32(BaseInt[np.dtypes.UInt32DType, np.uint32], HasEndianness):
     dtype_cls = np.dtypes.UInt32DType
     _zarr_v3_name: ClassVar[Literal["uint32"]] = "uint32"
-    _zarr_v2_names: ClassVar[tuple[str, ...]] = (">u4", "<u4")
+    _zarr_v2_names: ClassVar[tuple[Literal[">u4"], Literal["<u4"]]] = (">u4", "<u4")
 
     @overload
     def to_json(self, zarr_format: Literal[2]) -> Literal[">u4", "<u4"]: ...
@@ -427,23 +482,32 @@ def to_json(self, zarr_format: ZarrFormat) -> Literal["uint32", ">u4", "<u4"]:
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
 
     @classmethod
-    def _from_native_dtype_unchecked(cls, dtype: TBaseDType) -> Self:
-        byte_order = cast("EndiannessNumpy", dtype.byteorder)
-        return cls(endianness=endianness_from_numpy_str(byte_order))
+    def from_native_dtype(cls, dtype: TBaseDType) -> Self:
+        if cls._check_native_dtype(dtype):
+            return cls(endianness=get_endianness_from_numpy_dtype(dtype))
+        raise DataTypeValidationError(
+            f"Invalid data type: {dtype}. Expected an instance of {cls.dtype_cls}"
+        )
 
     def to_native_dtype(self) -> np.dtypes.UInt32DType:
         byte_order = endianness_to_numpy_str(self.endianness)
         return self.dtype_cls().newbyteorder(byte_order)
 
     @classmethod
-    def _from_json_unchecked(
-        cls, data: DTypeJSON_V2 | DTypeJSON_V3, *, zarr_format: ZarrFormat
-    ) -> Self:
-        if zarr_format == 2:
-            return cls.from_native_dtype(np.dtype(data))  # type: ignore[arg-type]
-        elif zarr_format == 3:
+    def from_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> Self:
+        if cls._check_json_v2(data, object_codec_id=object_codec_id):
+            # Going via numpy ensures that we get the endianness correct without
+            # annoying string parsing.
+            return cls.from_native_dtype(np.dtype(data))
+        msg = f"Invalid JSON representation of UInt32. Got {data!r}, expected one of the strings {cls._zarr_v2_names}."
+        raise DataTypeValidationError(msg)
+
+    @classmethod
+    def from_json_v3(cls, data: JSON) -> Self:
+        if cls._check_json_v3(data):
             return cls()
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+        msg = f"Invalid JSON representation of UInt32. Got {data!r}, expected the string {cls._zarr_v3_name!r}"
+        raise DataTypeValidationError(msg)
 
     @property
     def item_size(self) -> int:
@@ -454,7 +518,7 @@ def item_size(self) -> int:
 class Int64(BaseInt[np.dtypes.Int64DType, np.int64], HasEndianness):
     dtype_cls = np.dtypes.Int64DType
     _zarr_v3_name: ClassVar[Literal["int64"]] = "int64"
-    _zarr_v2_names: ClassVar[tuple[str, ...]] = (">i8", "<i8")
+    _zarr_v2_names: ClassVar[tuple[Literal[">i8"], Literal["<i8"]]] = (">i8", "<i8")
 
     @overload
     def to_json(self, zarr_format: Literal[2]) -> Literal[">i8", "<i8"]: ...
@@ -481,23 +545,32 @@ def to_json(self, zarr_format: ZarrFormat) -> Literal["int64", ">i8", "<i8"]:
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
 
     @classmethod
-    def _from_native_dtype_unchecked(cls, dtype: TBaseDType) -> Self:
-        byte_order = cast("EndiannessNumpy", dtype.byteorder)
-        return cls(endianness=endianness_from_numpy_str(byte_order))
+    def from_native_dtype(cls, dtype: TBaseDType) -> Self:
+        if cls._check_native_dtype(dtype):
+            return cls(endianness=get_endianness_from_numpy_dtype(dtype))
+        raise DataTypeValidationError(
+            f"Invalid data type: {dtype}. Expected an instance of {cls.dtype_cls}"
+        )
 
     def to_native_dtype(self) -> np.dtypes.Int64DType:
         byte_order = endianness_to_numpy_str(self.endianness)
         return self.dtype_cls().newbyteorder(byte_order)
 
     @classmethod
-    def _from_json_unchecked(
-        cls, data: DTypeJSON_V2 | DTypeJSON_V3, *, zarr_format: ZarrFormat
-    ) -> Self:
-        if zarr_format == 2:
-            return cls.from_native_dtype(np.dtype(data))  # type: ignore[arg-type]
-        elif zarr_format == 3:
+    def from_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> Self:
+        if cls._check_json_v2(data, object_codec_id=object_codec_id):
+            # Going via numpy ensures that we get the endianness correct without
+            # annoying string parsing.
+            return cls.from_native_dtype(np.dtype(data))
+        msg = f"Invalid JSON representation of Int64. Got {data!r}, expected one of the strings {cls._zarr_v2_names}."
+        raise DataTypeValidationError(msg)
+
+    @classmethod
+    def from_json_v3(cls, data: JSON) -> Self:
+        if cls._check_json_v3(data):
             return cls()
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+        msg = f"Invalid JSON representation of Int64. Got {data!r}, expected the string {cls._zarr_v3_name!r}"
+        raise DataTypeValidationError(msg)
 
     @property
     def item_size(self) -> int:
@@ -508,7 +581,7 @@ def item_size(self) -> int:
 class UInt64(BaseInt[np.dtypes.UInt64DType, np.uint64], HasEndianness):
     dtype_cls = np.dtypes.UInt64DType
     _zarr_v3_name: ClassVar[Literal["uint64"]] = "uint64"
-    _zarr_v2_names: ClassVar[tuple[str, ...]] = (">u8", "<u8")
+    _zarr_v2_names: ClassVar[tuple[Literal[">u8"], Literal["<u8"]]] = (">u8", "<u8")
 
     @overload
     def to_json(self, zarr_format: Literal[2]) -> Literal[">u8", "<u8"]: ...
@@ -537,23 +610,32 @@ def to_json(self, zarr_format: ZarrFormat) -> Literal["uint64", ">u8", "<u8"]:
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     @classmethod
-    def _from_native_dtype_unchecked(cls, dtype: TBaseDType) -> Self:
-        byte_order = cast("EndiannessNumpy", dtype.byteorder)
-        return cls(endianness=endianness_from_numpy_str(byte_order))
+    def from_native_dtype(cls, dtype: TBaseDType) -> Self:
+        if cls._check_native_dtype(dtype):
+            return cls(endianness=get_endianness_from_numpy_dtype(dtype))
+        raise DataTypeValidationError(
+            f"Invalid data type: {dtype}. Expected an instance of {cls.dtype_cls}"
+        )
 
     def to_native_dtype(self) -> np.dtypes.UInt64DType:
         byte_order = endianness_to_numpy_str(self.endianness)
         return self.dtype_cls().newbyteorder(byte_order)
 
     @classmethod
-    def _from_json_unchecked(
-        cls, data: DTypeJSON_V2 | DTypeJSON_V3, *, zarr_format: ZarrFormat
-    ) -> Self:
-        if zarr_format == 2:
-            return cls.from_native_dtype(np.dtype(data))  # type: ignore[arg-type]
-        elif zarr_format == 3:
+    def from_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> Self:
+        if cls._check_json_v2(data, object_codec_id=object_codec_id):
+            # Going via numpy ensures that we get the endianness correct without
+            # annoying string parsing.
+            return cls.from_native_dtype(np.dtype(data))
+        msg = f"Invalid JSON representation of UInt64. Got {data!r}, expected one of the strings {cls._zarr_v2_names}."
+        raise DataTypeValidationError(msg)
+
+    @classmethod
+    def from_json_v3(cls, data: JSON) -> Self:
+        if cls._check_json_v3(data):
             return cls()
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+        msg = f"Invalid JSON representation of UInt64. Got {data!r}, expected the string {cls._zarr_v3_name!r}"
+        raise DataTypeValidationError(msg)
 
     @property
     def item_size(self) -> int:
diff --git a/src/zarr/core/dtype/npy/string.py b/src/zarr/core/dtype/npy/string.py
index 377c364ca2..f811dce00a 100644
--- a/src/zarr/core/dtype/npy/string.py
+++ b/src/zarr/core/dtype/npy/string.py
@@ -2,12 +2,23 @@
 
 import re
 from dataclasses import dataclass
-from typing import TYPE_CHECKING, ClassVar, Literal, Self, TypedDict, TypeGuard, cast, overload
+from typing import (
+    TYPE_CHECKING,
+    ClassVar,
+    Literal,
+    Protocol,
+    Self,
+    TypedDict,
+    TypeGuard,
+    overload,
+    runtime_checkable,
+)
 
 import numpy as np
 
 from zarr.core.common import NamedConfig
 from zarr.core.dtype.common import (
+    DataTypeValidationError,
     HasEndianness,
     HasItemSize,
     HasLength,
@@ -15,12 +26,11 @@
     v3_unstable_dtype_warning,
 )
 from zarr.core.dtype.npy.common import (
-    EndiannessNumpy,
     check_json_str,
-    endianness_from_numpy_str,
     endianness_to_numpy_str,
+    get_endianness_from_numpy_dtype,
 )
-from zarr.core.dtype.wrapper import DTypeJSON_V2, DTypeJSON_V3, ZDType
+from zarr.core.dtype.wrapper import DTypeJSON_V2, DTypeJSON_V3, TDType_co, ZDType
 
 if TYPE_CHECKING:
     from zarr.core.common import JSON, ZarrFormat
@@ -29,6 +39,11 @@
 _NUMPY_SUPPORTS_VLEN_STRING = hasattr(np.dtypes, "StringDType")
 
 
+@runtime_checkable
+class SupportsStr(Protocol):
+    def __str__(self) -> str: ...
+
+
 class LengthBytesConfig(TypedDict):
     length_bytes: int
 
@@ -46,11 +61,15 @@ class FixedLengthUTF32(
     code_point_bytes: ClassVar[int] = 4  # utf32 is 4 bytes per code point
 
     @classmethod
-    def _from_native_dtype_unchecked(cls, dtype: TBaseDType) -> Self:
-        byte_order = cast("EndiannessNumpy", dtype.byteorder)
-        return cls(
-            length=dtype.itemsize // (cls.code_point_bytes),
-            endianness=endianness_from_numpy_str(byte_order),
+    def from_native_dtype(cls, dtype: TBaseDType) -> Self:
+        if cls._check_native_dtype(dtype):
+            endianness = get_endianness_from_numpy_dtype(dtype)
+            return cls(
+                length=dtype.itemsize // (cls.code_point_bytes),
+                endianness=endianness,
+            )
+        raise DataTypeValidationError(
+            f"Invalid data type: {dtype}. Expected an instance of {cls.dtype_cls}"
         )
 
     def to_native_dtype(self) -> np.dtypes.StrDType[int]:
@@ -58,9 +77,9 @@ def to_native_dtype(self) -> np.dtypes.StrDType[int]:
         return self.dtype_cls(self.length).newbyteorder(byte_order)
 
     @classmethod
-    def _check_json_v2(cls, data: JSON, object_codec_id: str | None = None) -> TypeGuard[str]:
+    def _check_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> TypeGuard[str]:
         """
-        Check that the input is a valid JSON representation of a numpy S dtype.
+        Check that the input is a valid JSON representation of a numpy U dtype.
         """
         return isinstance(data, str) and re.match(r"^[><]U\d+$", data) is not None
 
@@ -94,14 +113,20 @@ def to_json(self, zarr_format: ZarrFormat) -> str | FixedLengthUTF32JSONV3:
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     @classmethod
-    def _from_json_unchecked(
-        cls, data: DTypeJSON_V2 | DTypeJSON_V3, *, zarr_format: ZarrFormat
-    ) -> Self:
-        if zarr_format == 2:
-            return cls.from_native_dtype(np.dtype(data))  # type: ignore[arg-type]
-        elif zarr_format == 3:
-            return cls(length=data["configuration"]["length_bytes"] // cls.code_point_bytes)  # type: ignore[index, call-overload]
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+    def from_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> Self:
+        if cls._check_json_v2(data, object_codec_id=object_codec_id):
+            # Construct the numpy dtype instead of string parsing.
+            return cls.from_native_dtype(np.dtype(data))
+        raise DataTypeValidationError(
+            f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected a string representation of a numpy U dtype."
+        )
+
+    @classmethod
+    def from_json_v3(cls, data: JSON) -> Self:
+        if cls._check_json_v3(data):
+            return cls(length=data["configuration"]["length_bytes"] // cls.code_point_bytes)
+        msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected {cls._zarr_v3_name}."
+        raise DataTypeValidationError(msg)
 
     def default_scalar(self) -> np.str_:
         return np.str_("")
@@ -118,18 +143,22 @@ def _check_scalar(self, data: object) -> TypeGuard[str | np.str_ | bytes | int]:
         # this is generous for backwards compatibility
         return isinstance(data, str | np.str_ | bytes | int)
 
-    def _cast_scalar_unchecked(self, data: object) -> np.str_:
-        # We explicitly truncate the result because of the following numpy behavior:
-        # >>> x = np.dtype('U3').type('hello world')
-        # >>> x
-        # np.str_('hello world')
-        # >>> x.dtype
-        # dtype('U11')
-
-        if isinstance(data, int):
-            return self.to_native_dtype().type(str(data)[: self.length])
-        else:
-            return self.to_native_dtype().type(data[: self.length])  # type: ignore[index]
+    def cast_scalar(self, data: object) -> np.str_:
+        if self._check_scalar(data):
+            # We explicitly truncate before casting because of the following numpy behavior:
+            # >>> x = np.dtype('U3').type('hello world')
+            # >>> x
+            # np.str_('hello world')
+            # >>> x.dtype
+            # dtype('U11')
+
+            if isinstance(data, int):
+                return self.to_native_dtype().type(str(data)[: self.length])
+            else:
+                return self.to_native_dtype().type(data[: self.length])
+        raise TypeError(
+            f"Cannot convert object with type {type(data)} to a numpy unicode string scalar."
+        )
 
     @property
     def item_size(self) -> int:
@@ -145,144 +174,119 @@ def check_vlen_string_json_scalar(data: object) -> TypeGuard[int | str | float]:
     return isinstance(data, int | str | float)
 
 
-if _NUMPY_SUPPORTS_VLEN_STRING:
+# VariableLengthUTF8 is defined in two places, conditioned on the version of numpy.
+# If numpy 2 is installed, then VariableLengthUTF8 is defined with the numpy variable length
+# string dtype as the native dtype. Otherwise, VariableLengthUTF8 is defined with the numpy object
+# dtype as the native dtype.
+class UTF8Base(ZDType[TDType_co, str], HasObjectCodec):
+    """
+    A base class for the variable length UTF-8 string data type. This class should not be used
+    as data type, but as a base class for other variable length string data types.
+    """
 
-    @dataclass(frozen=True, kw_only=True)
-    class VariableLengthUTF8(ZDType[np.dtypes.StringDType, str], HasObjectCodec):  # type: ignore[type-var]
-        dtype_cls = np.dtypes.StringDType
-        _zarr_v3_name: ClassVar[Literal["variable_length_utf8"]] = "variable_length_utf8"
-        object_codec_id = "vlen-utf8"
+    _zarr_v3_name: ClassVar[Literal["variable_length_utf8"]] = "variable_length_utf8"
+    object_codec_id: ClassVar[Literal["vlen-utf8"]] = "vlen-utf8"
 
-        @classmethod
-        def _from_native_dtype_unchecked(cls, dtype: TBaseDType) -> Self:
+    @classmethod
+    def from_native_dtype(cls, dtype: TBaseDType) -> Self:
+        if cls._check_native_dtype(dtype):
             return cls()
+        raise DataTypeValidationError(
+            f"Invalid data type: {dtype}. Expected an instance of {cls.dtype_cls}"
+        )
 
-        def to_native_dtype(self) -> np.dtypes.StringDType:
-            return self.dtype_cls()
+    @classmethod
+    def _check_json_v2(
+        cls, data: JSON, *, object_codec_id: str | None = None
+    ) -> TypeGuard[Literal["|O"]]:
+        """
+        Check that the input is a valid JSON representation of a numpy O dtype, and that the
+        object codec id is appropriate for variable-length UTF-8 strings.
+        """
+        return data == "|O" and object_codec_id == cls.object_codec_id
+
+    @classmethod
+    def _check_json_v3(cls, data: JSON) -> TypeGuard[Literal["variable_length_utf8"]]:
+        return data == cls._zarr_v3_name
+
+    @classmethod
+    def from_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> Self:
+        if cls._check_json_v2(data, object_codec_id=object_codec_id):
+            return cls()
+        msg = (
+            f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected the string '|O'"
+        )
+        raise DataTypeValidationError(msg)
 
-        @classmethod
-        def _check_json_v2(
-            cls, data: JSON, *, object_codec_id: str | None = None
-        ) -> TypeGuard[Literal["|O"]]:
-            """
-            Check that the input is a valid JSON representation of a numpy O dtype, and that the
-            object codec id is appropriate for variable-length UTF-8 strings.
-            """
-            return data == "|O" and object_codec_id == cls.object_codec_id
-
-        @classmethod
-        def _check_json_v3(cls, data: JSON) -> TypeGuard[Literal["variable_length_utf8"]]:
-            return data == cls._zarr_v3_name
-
-        @overload
-        def to_json(self, zarr_format: Literal[2]) -> Literal["|O"]: ...
-        @overload
-        def to_json(self, zarr_format: Literal[3]) -> Literal["variable_length_utf8"]: ...
-
-        def to_json(self, zarr_format: ZarrFormat) -> Literal["|O", "variable_length_utf8"]:
-            if zarr_format == 2:
-                # Note: unlike many other numpy data types, we don't serialize the .str attribute
-                # of the data type to JSON. This is because Zarr was using `|O` for strings before the
-                # numpy variable length string data type existed, and we want to be consistent with
-                # that practice
-                return "|O"
-            elif zarr_format == 3:
-                v3_unstable_dtype_warning(self)
-                return self._zarr_v3_name
-            raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
-
-        @classmethod
-        def _from_json_unchecked(
-            cls, data: DTypeJSON_V2 | DTypeJSON_V3, *, zarr_format: ZarrFormat
-        ) -> Self:
+    @classmethod
+    def from_json_v3(cls, data: JSON) -> Self:
+        if cls._check_json_v3(data):
             return cls()
+        msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected {cls._zarr_v3_name}."
+        raise DataTypeValidationError(msg)
 
-        def default_scalar(self) -> str:
-            return ""
+    @overload
+    def to_json(self, zarr_format: Literal[2]) -> Literal["|O"]: ...
+    @overload
+    def to_json(self, zarr_format: Literal[3]) -> Literal["variable_length_utf8"]: ...
 
-        def to_json_scalar(self, data: object, *, zarr_format: ZarrFormat) -> str:
-            if self._check_scalar(data):
-                return data
-            raise TypeError(f"Invalid type: {data}. Expected a string.")
+    def to_json(self, zarr_format: ZarrFormat) -> Literal["|O", "variable_length_utf8"]:
+        if zarr_format == 2:
+            # Note: unlike many other numpy data types, we don't serialize the .str attribute
+            # of the data type to JSON. This is because Zarr was using `|O` for strings before the
+            # numpy variable length string data type existed, and we want to be consistent with
+            # that practice
+            return "|O"
+        elif zarr_format == 3:
+            v3_unstable_dtype_warning(self)
+            return self._zarr_v3_name
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
-        def from_json_scalar(self, data: JSON, *, zarr_format: ZarrFormat) -> str:
-            if not check_vlen_string_json_scalar(data):
-                raise TypeError(f"Invalid type: {data}. Expected a string or number.")
-            return str(data)
+    @classmethod
+    def _from_json_unchecked(
+        cls, data: DTypeJSON_V2 | DTypeJSON_V3, *, zarr_format: ZarrFormat
+    ) -> Self:
+        return cls()
 
-        def _check_scalar(self, data: object) -> TypeGuard[str]:
-            return isinstance(data, str)
+    def default_scalar(self) -> str:
+        return ""
 
-        def _cast_scalar_unchecked(self, data: object) -> str:
-            return str(data)
+    def to_json_scalar(self, data: object, *, zarr_format: ZarrFormat) -> str:
+        if self._check_scalar(data):
+            return self._cast_scalar_unchecked(data)
+        raise TypeError(f"Invalid type: {data}. Expected a string.")
 
-else:
-    # Numpy pre-2 does not have a variable length string dtype, so we use the Object dtype instead.
-    @dataclass(frozen=True, kw_only=True)
-    class VariableLengthUTF8(ZDType[np.dtypes.ObjectDType, str], HasObjectCodec):  # type: ignore[no-redef]
-        dtype_cls = np.dtypes.ObjectDType
-        _zarr_v3_name: ClassVar[Literal["variable_length_utf8"]] = "variable_length_utf8"
-        object_codec_id = "vlen-utf8"
+    def from_json_scalar(self, data: JSON, *, zarr_format: ZarrFormat) -> str:
+        if not check_vlen_string_json_scalar(data):
+            raise TypeError(f"Invalid type: {data}. Expected a string or number.")
+        return str(data)
 
-        @classmethod
-        def _from_native_dtype_unchecked(cls, dtype: TBaseDType) -> Self:
-            return cls()
+    def _check_scalar(self, data: object) -> TypeGuard[SupportsStr]:
+        return isinstance(data, SupportsStr)
 
-        def to_native_dtype(self) -> np.dtypes.ObjectDType:
-            return self.dtype_cls()
+    def _cast_scalar_unchecked(self, data: SupportsStr) -> str:
+        return str(data)
 
-        @classmethod
-        def _check_json_v2(
-            cls, data: JSON, *, object_codec_id: str | None = None
-        ) -> TypeGuard[Literal["|O"]]:
-            """
-            Check that the input is a valid JSON representation of a numpy O dtype, and that the
-            object codec id is appropriate for variable-length UTF-8 strings.
-            """
-            return data == "|O" and object_codec_id == cls.object_codec_id
-
-        @classmethod
-        def _check_json_v3(cls, data: JSON) -> TypeGuard[Literal["variable_length_utf8"]]:
-            return data == cls._zarr_v3_name
-
-        @overload
-        def to_json(self, zarr_format: Literal[2]) -> Literal["|O"]: ...
-
-        @overload
-        def to_json(self, zarr_format: Literal[3]) -> Literal["variable_length_utf8"]: ...
-
-        def to_json(self, zarr_format: ZarrFormat) -> Literal["|O", "variable_length_utf8"]:
-            if zarr_format == 2:
-                return "|O"
-            elif zarr_format == 3:
-                v3_unstable_dtype_warning(self)
-                return self._zarr_v3_name
-            raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
-
-        @classmethod
-        def _from_json_unchecked(
-            cls, data: DTypeJSON_V2 | DTypeJSON_V3, *, zarr_format: ZarrFormat
-        ) -> Self:
-            return cls()
+    def cast_scalar(self, data: object) -> str:
+        if self._check_scalar(data):
+            return self._cast_scalar_unchecked(data)
+        raise TypeError(f"Cannot convert object with type {type(data)} to a python string.")
 
-        def default_scalar(self) -> str:
-            return ""
 
-        def to_json_scalar(self, data: object, *, zarr_format: ZarrFormat) -> str:
-            if self._check_scalar(data):
-                return data
-            raise TypeError(f"Invalid type: {data}. Expected a string.")
+if _NUMPY_SUPPORTS_VLEN_STRING:
+
+    @dataclass(frozen=True, kw_only=True)
+    class VariableLengthUTF8(UTF8Base[np.dtypes.StringDType]):  # type: ignore[type-var]
+        dtype_cls = np.dtypes.StringDType
 
-        def from_json_scalar(self, data: JSON, *, zarr_format: ZarrFormat) -> str:
-            """
-            Strings pass through
-            """
-            if not check_json_str(data):
-                raise TypeError(f"Invalid type: {data}. Expected a string.")
-            return data
+        def to_native_dtype(self) -> np.dtypes.StringDType:
+            return self.dtype_cls()
 
-        def _check_scalar(self, data: object) -> TypeGuard[str]:
-            return isinstance(data, str)
+else:
+    # Numpy pre-2 does not have a variable length string dtype, so we use the Object dtype instead.
+    @dataclass(frozen=True, kw_only=True)
+    class VariableLengthUTF8(UTF8Base[np.dtypes.ObjectDType]):  # type: ignore[no-redef]
+        dtype_cls = np.dtypes.ObjectDType
 
-        def _cast_scalar_unchecked(self, data: object) -> str:
-            return str(data)
+        def to_native_dtype(self) -> np.dtypes.ObjectDType:
+            return self.dtype_cls()
diff --git a/src/zarr/core/dtype/npy/structured.py b/src/zarr/core/dtype/npy/structured.py
index 579e0a9e27..b6196b7fed 100644
--- a/src/zarr/core/dtype/npy/structured.py
+++ b/src/zarr/core/dtype/npy/structured.py
@@ -17,6 +17,8 @@
 )
 from zarr.core.dtype.wrapper import DTypeJSON_V2, DTypeJSON_V3, TBaseDType, TBaseScalar, ZDType
 
+StructuredScalarLike = list[object] | tuple[object, ...] | bytes | int
+
 
 # TODO: tighten this up, get a v3 spec in place, handle endianness, etc.
 @dataclass(frozen=True, kw_only=True)
@@ -25,19 +27,6 @@ class Structured(ZDType[np.dtypes.VoidDType[int], np.void], HasItemSize):
     _zarr_v3_name = "structured"
     fields: tuple[tuple[str, ZDType[TBaseDType, TBaseScalar]], ...]
 
-    def default_scalar(self) -> np.void:
-        return self._cast_scalar_unchecked(0)
-
-    def _cast_scalar_unchecked(self, data: object) -> np.void:
-        na_dtype = self.to_native_dtype()
-        if isinstance(data, bytes):
-            res = np.frombuffer(data, dtype=na_dtype)[0]
-        elif isinstance(data, list | tuple):
-            res = np.array([tuple(data)], dtype=na_dtype)[0]
-        else:
-            res = np.array([data], dtype=na_dtype)[0]
-        return cast("np.void", res)
-
     @classmethod
     def _check_native_dtype(cls, dtype: TBaseDType) -> TypeGuard[np.dtypes.VoidDType[int]]:
         """
@@ -53,24 +42,30 @@ def _check_native_dtype(cls, dtype: TBaseDType) -> TypeGuard[np.dtypes.VoidDType
         TypeGuard[np.dtypes.VoidDType]
             True if the dtype matches, False otherwise.
         """
-        return super()._check_native_dtype(dtype) and dtype.fields is not None
+        return isinstance(dtype, cls.dtype_cls) and dtype.fields is not None  # type: ignore[has-type]
 
     @classmethod
-    def _from_native_dtype_unchecked(cls, dtype: TBaseDType) -> Self:
+    def from_native_dtype(cls, dtype: TBaseDType) -> Self:
         from zarr.core.dtype import get_data_type_from_native_dtype
 
         fields: list[tuple[str, ZDType[TBaseDType, TBaseScalar]]] = []
+        if cls._check_native_dtype(dtype):
+            # fields of a structured numpy dtype are either 2-tuples or 3-tuples. we only
+            # care about the first element in either case.
+            for key, (dtype_instance, *_) in dtype.fields.items():  # type: ignore[union-attr]
+                dtype_wrapped = get_data_type_from_native_dtype(dtype_instance)
+                fields.append((key, dtype_wrapped))
+
+            return cls(fields=tuple(fields))
+        raise DataTypeValidationError(
+            f"Invalid data type: {dtype}. Expected an instance of {cls.dtype_cls}"  # type: ignore[has-type]
+        )
 
-        if dtype.fields is None:
-            raise ValueError("numpy dtype has no fields")
-
-        # fields of a structured numpy dtype are either 2-tuples or 3-tuples. we only
-        # care about the first element in either case.
-        for key, (dtype_instance, *_) in dtype.fields.items():
-            dtype_wrapped = get_data_type_from_native_dtype(dtype_instance)
-            fields.append((key, dtype_wrapped))
-
-        return cls(fields=tuple(fields))
+    def to_native_dtype(self) -> np.dtypes.VoidDType[int]:
+        return cast(
+            "np.dtypes.VoidDType[int]",
+            np.dtype([(key, dtype.to_native_dtype()) for (key, dtype) in self.fields]),
+        )
 
     @overload
     def to_json(self, zarr_format: Literal[2]) -> DTypeJSON_V2: ...
@@ -113,67 +108,78 @@ def _check_json_v3(
     ) -> TypeGuard[NamedConfig[Literal["structured"], dict[str, Sequence[tuple[str, JSON]]]]]:
         return (
             isinstance(data, dict)
-            and "name" in data
+            and set(data.keys()) == {"name", "configuration"}
             and data["name"] == cls._zarr_v3_name
-            and "configuration" in data
             and isinstance(data["configuration"], dict)
-            and "fields" in data["configuration"]
+            and set(data["configuration"].keys()) == {"fields"}
         )
 
     @classmethod
-    def _from_json_unchecked(
-        cls, data: DTypeJSON_V2 | DTypeJSON_V3, *, zarr_format: ZarrFormat
-    ) -> Self:
-        # avoid circular import issues by importing these functions here
-        from zarr.core.dtype import get_data_type_from_json_v2, get_data_type_from_json_v3
-
-        # This is a horrible mess, because this data type is recursive
-        if zarr_format == 2:
-            if cls._check_json_v2(data):  # type: ignore[arg-type]
-                # structured dtypes are constructed directly from a list of lists
-                # note that we do not handle the object codec here! this will prevent structured
-                # dtypes from containing object dtypes.
-                return cls(
-                    fields=tuple(  # type: ignore[misc]
-                        (f_name, get_data_type_from_json_v2(f_dtype, object_codec_id=None))  # type: ignore[has-type]
-                        for f_name, f_dtype in data
-                    )
+    def from_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> Self:
+        # avoid circular import
+        from zarr.core.dtype import get_data_type_from_json_v2
+
+        if cls._check_json_v2(data):
+            # structured dtypes are constructed directly from a list of lists
+            # note that we do not handle the object codec here! this will prevent structured
+            # dtypes from containing object dtypes.
+            return cls(
+                fields=tuple(  # type: ignore[misc]
+                    (f_name, get_data_type_from_json_v2(f_dtype, object_codec_id=None))  # type: ignore[has-type]
+                    for f_name, f_dtype in data
                 )
-            else:
-                raise DataTypeValidationError(f"Invalid JSON representation of data type {cls}.")
-        elif zarr_format == 3:
-            if cls._check_json_v3(data):  # type: ignore[arg-type]
-                config = data["configuration"]
-                meta_fields = config["fields"]
-                fields = tuple(
+            )
+        msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected a JSON array of arrays"
+        raise DataTypeValidationError(msg)
+
+    @classmethod
+    def from_json_v3(cls, data: JSON) -> Self:
+        # avoid circular import
+        from zarr.core.dtype import get_data_type_from_json_v3
+
+        if cls._check_json_v3(data):
+            config = data["configuration"]
+            meta_fields = config["fields"]
+            return cls(
+                fields=tuple(
                     (f_name, get_data_type_from_json_v3(f_dtype)) for f_name, f_dtype in meta_fields
                 )
-            else:
-                raise DataTypeValidationError(f"Invalid JSON representation of data type {cls}.")
-        else:
-            raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
-
-        return cls(fields=fields)
-
-    def to_native_dtype(self) -> np.dtypes.VoidDType[int]:
-        return cast(
-            "np.dtypes.VoidDType[int]",
-            np.dtype([(key, dtype.to_native_dtype()) for (key, dtype) in self.fields]),
-        )
+            )
+        msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected a JSON object with the key {cls._zarr_v3_name!r}"
+        raise DataTypeValidationError(msg)
 
     def to_json_scalar(self, data: object, *, zarr_format: ZarrFormat) -> str:
         return bytes_to_json(self.cast_scalar(data).tobytes(), zarr_format)
 
-    def _check_scalar(self, data: object) -> bool:
+    def _check_scalar(self, data: object) -> TypeGuard[StructuredScalarLike]:
         # TODO: implement something here!
         return True
 
+    def default_scalar(self) -> np.void:
+        return self._cast_scalar_unchecked(0)
+
+    def cast_scalar(self, data: object) -> np.void:
+        if self._check_scalar(data):
+            return self._cast_scalar_unchecked(data)
+        msg = f"Cannot convert object with type {type(data)} to a numpy structured scalar."
+        raise TypeError(msg)
+
+    def _cast_scalar_unchecked(self, data: StructuredScalarLike) -> np.void:
+        na_dtype = self.to_native_dtype()
+        if isinstance(data, bytes):
+            res = np.frombuffer(data, dtype=na_dtype)[0]
+        elif isinstance(data, list | tuple):
+            res = np.array([tuple(data)], dtype=na_dtype)[0]
+        else:
+            res = np.array([data], dtype=na_dtype)[0]
+        return cast("np.void", res)
+
     def from_json_scalar(self, data: JSON, *, zarr_format: ZarrFormat) -> np.void:
         if check_json_str(data):
             as_bytes = bytes_from_json(data, zarr_format=zarr_format)
             dtype = self.to_native_dtype()
             return cast("np.void", np.array([as_bytes]).view(dtype)[0])
-        raise TypeError(f"Invalid type: {data}. Expected a string.")  # pragma: no cover
+        raise TypeError(f"Invalid type: {data}. Expected a string.")
 
     @property
     def item_size(self) -> int:
diff --git a/src/zarr/core/dtype/npy/time.py b/src/zarr/core/dtype/npy/time.py
index 9f82d3d168..dd4f3840b1 100644
--- a/src/zarr/core/dtype/npy/time.py
+++ b/src/zarr/core/dtype/npy/time.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 from dataclasses import dataclass
+from datetime import datetime, timedelta
 from typing import (
     TYPE_CHECKING,
     ClassVar,
@@ -17,20 +18,21 @@
 import numpy as np
 
 from zarr.core.common import NamedConfig
-from zarr.core.dtype.common import HasEndianness, HasItemSize
+from zarr.core.dtype.common import DataTypeValidationError, HasEndianness, HasItemSize
 from zarr.core.dtype.npy.common import (
     DateTimeUnit,
-    EndiannessNumpy,
     check_json_int,
-    endianness_from_numpy_str,
     endianness_to_numpy_str,
+    get_endianness_from_numpy_dtype,
 )
-from zarr.core.dtype.wrapper import DTypeJSON_V2, DTypeJSON_V3, TBaseDType, ZDType
+from zarr.core.dtype.wrapper import TBaseDType, ZDType
 
 if TYPE_CHECKING:
     from zarr.core.common import JSON, ZarrFormat
 
 _DTypeName = Literal["datetime64", "timedelta64"]
+TimeDeltaLike = str | int | bytes | np.timedelta64 | timedelta | None
+DateTimeLike = str | int | bytes | np.datetime64 | datetime | None
 
 
 def datetime_from_int(data: int, *, unit: DateTimeUnit, scale_factor: int) -> np.datetime64:
@@ -72,17 +74,27 @@ def datetimelike_to_int(data: np.datetime64 | np.timedelta64) -> int:
     return data.view(np.int64).item()
 
 
-_BaseTimeDType_co = TypeVar(
-    "_BaseTimeDType_co",
+def check_json_time(data: JSON) -> TypeGuard[Literal["NaT"] | int]:
+    """
+    Type guard to check if the input JSON data is the literal string "NaT"
+    or an integer.
+    """
+    return check_json_int(data) or data == "NaT"
+
+
+BaseTimeDType_co = TypeVar(
+    "BaseTimeDType_co",
     bound=np.dtypes.TimeDelta64DType | np.dtypes.DateTime64DType,
     covariant=True,
 )
-_BaseTimeScalar = TypeVar("_BaseTimeScalar", bound=np.timedelta64 | np.datetime64)
+BaseTimeScalar_co = TypeVar(
+    "BaseTimeScalar_co", bound=np.timedelta64 | np.datetime64, covariant=True
+)
 
 
 class TimeConfig(TypedDict):
     unit: DateTimeUnit
-    interval: int
+    scale_factor: int
 
 
 DateTime64JSONV3 = NamedConfig[Literal["numpy.datetime64"], TimeConfig]
@@ -90,7 +102,7 @@ class TimeConfig(TypedDict):
 
 
 @dataclass(frozen=True, kw_only=True, slots=True)
-class TimeDTypeBase(ZDType[_BaseTimeDType_co, _BaseTimeScalar], HasEndianness, HasItemSize):
+class TimeDTypeBase(ZDType[BaseTimeDType_co, BaseTimeScalar_co], HasEndianness, HasItemSize):
     _zarr_v2_names: ClassVar[tuple[str, ...]]
     # this attribute exists so that we can programmatically create a numpy dtype instance
     # because the particular numpy dtype we are wrapping does not allow direct construction via
@@ -108,33 +120,26 @@ def __post_init__(self) -> None:
             raise ValueError(f"unit must be one of {get_args(DateTimeUnit)}, got {self.unit!r}.")
 
     @classmethod
-    def _from_native_dtype_unchecked(cls, dtype: TBaseDType) -> Self:
-        unit, scale_factor = np.datetime_data(dtype.name)
-        unit = cast("DateTimeUnit", unit)
-        byteorder = cast("EndiannessNumpy", dtype.byteorder)
-        return cls(
-            unit=unit, scale_factor=scale_factor, endianness=endianness_from_numpy_str(byteorder)
+    def from_native_dtype(cls, dtype: TBaseDType) -> Self:
+        if cls._check_native_dtype(dtype):
+            unit, scale_factor = np.datetime_data(dtype.name)
+            unit = cast("DateTimeUnit", unit)
+            return cls(
+                unit=unit,
+                scale_factor=scale_factor,
+                endianness=get_endianness_from_numpy_dtype(dtype),
+            )
+        raise DataTypeValidationError(
+            f"Invalid data type: {dtype}. Expected an instance of {cls.dtype_cls}"
         )
 
-    def to_native_dtype(self) -> _BaseTimeDType_co:
+    def to_native_dtype(self) -> BaseTimeDType_co:
         # Numpy does not allow creating datetime64 or timedelta64 via
         # np.dtypes.{dtype_name}()
         # so we use np.dtype with a formatted string.
         dtype_string = f"{self._numpy_name}[{self.scale_factor}{self.unit}]"
         return np.dtype(dtype_string).newbyteorder(endianness_to_numpy_str(self.endianness))  # type: ignore[return-value]
 
-    @classmethod
-    def _from_json_unchecked(
-        cls, data: DTypeJSON_V2 | DTypeJSON_V3, *, zarr_format: ZarrFormat
-    ) -> Self:
-        if zarr_format == 2:
-            return cls.from_native_dtype(np.dtype(data))  # type: ignore[arg-type]
-        elif zarr_format == 3:
-            unit = data["configuration"]["unit"]  # type: ignore[index, call-overload]
-            scale_factor = data["configuration"]["scale_factor"]  # type: ignore[index, call-overload]
-            return cls(unit=unit, scale_factor=scale_factor)
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
-
     @overload
     def to_json(self, zarr_format: Literal[2]) -> str: ...
     @overload
@@ -156,14 +161,6 @@ def to_json(self, zarr_format: ZarrFormat) -> str | DateTime64JSONV3 | TimeDelta
     def to_json_scalar(self, data: object, *, zarr_format: ZarrFormat) -> int:
         return datetimelike_to_int(data)  # type: ignore[arg-type]
 
-    def _check_scalar(self, data: object) -> bool:
-        # TODO: decide which values we should accept for datetimes.
-        try:
-            np.array([data], dtype=self.to_native_dtype())
-            return True  # noqa: TRY300
-        except ValueError:
-            return False
-
     @property
     def item_size(self) -> int:
         return 8
@@ -178,6 +175,8 @@ class TimeDelta64(TimeDTypeBase[np.dtypes.TimeDelta64DType, np.timedelta64], Has
     unit for ``TimeDelta64`` is optional.
     """
 
+    # mypy infers the type of np.dtypes.TimeDelta64DType to be
+    # "Callable[[Literal['Y', 'M', 'W', 'D'] | Literal['h', 'm', 's', 'ms', 'us', 'ns', 'ps', 'fs', 'as']], Never]"
     dtype_cls = np.dtypes.TimeDelta64DType  # type: ignore[assignment]
     _zarr_v3_name: ClassVar[Literal["numpy.timedelta64"]] = "numpy.timedelta64"
     _zarr_v2_names = (">m8", "<m8")
@@ -189,12 +188,23 @@ def default_scalar(self) -> np.timedelta64:
         return np.timedelta64("NaT")
 
     def from_json_scalar(self, data: JSON, *, zarr_format: ZarrFormat) -> np.timedelta64:
-        if check_json_int(data) or data == "NaT":
-            return self.to_native_dtype().type(data, f"{self.scale_factor}{self.unit}")  # type: ignore[arg-type]
+        if check_json_time(data):
+            return self.to_native_dtype().type(data, f"{self.scale_factor}{self.unit}")
         raise TypeError(f"Invalid type: {data}. Expected an integer.")  # pragma: no cover
 
-    def _cast_scalar_unchecked(self, data: object) -> np.timedelta64:
-        return self.to_native_dtype().type(data)  # type: ignore[arg-type]
+    def _check_scalar(self, data: object) -> TypeGuard[TimeDeltaLike]:
+        if data is None:
+            return True
+        return isinstance(data, str | int | bytes | np.timedelta64 | timedelta)
+
+    def _cast_scalar_unchecked(self, data: TimeDeltaLike) -> np.timedelta64:
+        return self.to_native_dtype().type(data, f"{self.scale_factor}{self.unit}")
+
+    def cast_scalar(self, data: object) -> np.timedelta64:
+        if self._check_scalar(data):
+            return self._cast_scalar_unchecked(data)
+        msg = f"Cannot convert object with type {type(data)} to a numpy timedelta64 scalar."
+        raise TypeError(msg)
 
     @classmethod
     def _check_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> TypeGuard[str]:
@@ -221,6 +231,30 @@ def _check_json_v3(cls, data: JSON) -> TypeGuard[DateTime64JSONV3]:
             and set(data["configuration"].keys()) == {"unit", "scale_factor"}
         )
 
+    @classmethod
+    def from_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> Self:
+        if cls._check_json_v2(data):
+            return cls.from_native_dtype(np.dtype(data))
+        msg = (
+            f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected a string "
+            f"representation of an instance of {cls.dtype_cls}"  # type: ignore[has-type]
+        )
+        raise DataTypeValidationError(msg)
+
+    @classmethod
+    def from_json_v3(cls, data: JSON) -> Self:
+        if cls._check_json_v3(data):
+            unit = data["configuration"]["unit"]
+            scale_factor = data["configuration"]["scale_factor"]
+            return cls(unit=unit, scale_factor=scale_factor)
+        msg = (
+            f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected a dict "
+            f"with a 'name' key with the value 'numpy.timedelta64', "
+            "and a 'configuration' key with a value of a dict with a 'unit' key and a "
+            "'scale_factor' key"
+        )
+        raise DataTypeValidationError(msg)
+
 
 @dataclass(frozen=True, kw_only=True, slots=True)
 class DateTime64(TimeDTypeBase[np.dtypes.DateTime64DType, np.datetime64], HasEndianness):
@@ -235,12 +269,23 @@ def default_scalar(self) -> np.datetime64:
         return np.datetime64("NaT")
 
     def from_json_scalar(self, data: JSON, *, zarr_format: ZarrFormat) -> np.datetime64:
-        if check_json_int(data) or data == "NaT":
-            return self.to_native_dtype().type(data, f"{self.scale_factor}{self.unit}")  # type: ignore[arg-type]
+        if check_json_time(data):
+            return self._cast_scalar_unchecked(data)
         raise TypeError(f"Invalid type: {data}. Expected an integer.")  # pragma: no cover
 
-    def _cast_scalar_unchecked(self, data: object) -> np.datetime64:
-        return self.to_native_dtype().type(data, f"{self.scale_factor}{self.unit}")  # type: ignore[no-any-return, call-overload]
+    def _check_scalar(self, data: object) -> TypeGuard[DateTimeLike]:
+        if data is None:
+            return True
+        return isinstance(data, str | int | bytes | np.datetime64 | datetime)
+
+    def _cast_scalar_unchecked(self, data: DateTimeLike) -> np.datetime64:
+        return self.to_native_dtype().type(data, f"{self.scale_factor}{self.unit}")
+
+    def cast_scalar(self, data: object) -> np.datetime64:
+        if self._check_scalar(data):
+            return self._cast_scalar_unchecked(data)
+        msg = f"Cannot convert object with type {type(data)} to a numpy datetime scalar."
+        raise TypeError(msg)
 
     @classmethod
     def _check_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> TypeGuard[str]:
@@ -266,3 +311,27 @@ def _check_json_v3(cls, data: JSON) -> TypeGuard[DateTime64JSONV3]:
             and isinstance(data["configuration"], dict)
             and set(data["configuration"].keys()) == {"unit", "scale_factor"}
         )
+
+    @classmethod
+    def from_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> Self:
+        if cls._check_json_v2(data):
+            return cls.from_native_dtype(np.dtype(data))
+        msg = (
+            f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected a string "
+            f"representation of an instance of {cls.dtype_cls}"  # type: ignore[has-type]
+        )
+        raise DataTypeValidationError(msg)
+
+    @classmethod
+    def from_json_v3(cls, data: JSON) -> Self:
+        if cls._check_json_v3(data):
+            unit = data["configuration"]["unit"]
+            scale_factor = data["configuration"]["scale_factor"]
+            return cls(unit=unit, scale_factor=scale_factor)
+        msg = (
+            f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected a dict "
+            f"with a 'name' key with the value 'numpy.datetime64', "
+            "and a 'configuration' key with a value of a dict with a 'unit' key and a "
+            "'scale_factor' key"
+        )
+        raise DataTypeValidationError(msg)
diff --git a/src/zarr/core/dtype/wrapper.py b/src/zarr/core/dtype/wrapper.py
index 94fbe60242..557da87fcf 100644
--- a/src/zarr/core/dtype/wrapper.py
+++ b/src/zarr/core/dtype/wrapper.py
@@ -39,8 +39,6 @@
 
 import numpy as np
 
-from zarr.core.dtype.common import DataTypeValidationError
-
 if TYPE_CHECKING:
     from zarr.core.common import JSON, ZarrFormat
 
@@ -101,12 +99,12 @@ def _check_native_dtype(cls: type[Self], dtype: TBaseDType) -> TypeGuard[TDType_
         return type(dtype) is cls.dtype_cls
 
     @classmethod
+    @abstractmethod
     def from_native_dtype(cls: type[Self], dtype: TBaseDType) -> Self:
         """
         Create a ZDType instance from a native data type. The default implementation first performs
-        a type check via ``cls._check_native_dtype``. If that type check succeeds, then
-        ``cls._from_native_dtype_unchecked`` is called, which assumes that the incoming object
-        as all the properties necessary for instantiating the ZDType.
+        a type check via ``cls._check_native_dtype``. If that type check succeeds, the ZDType class
+        instance is created.
 
         This method is used when taking a user-provided native data type, like a NumPy data type,
         and creating the corresponding ZDType instance from them.
@@ -126,29 +124,6 @@ def from_native_dtype(cls: type[Self], dtype: TBaseDType) -> Self:
         TypeError
             If the native data type is not consistent with the wrapped data type.
         """
-        if cls._check_native_dtype(dtype):
-            return cls._from_native_dtype_unchecked(dtype)
-        raise DataTypeValidationError(
-            f"Invalid dtype: {dtype}. Expected an instance of {cls.dtype_cls}."
-        )
-
-    @classmethod
-    @abstractmethod
-    def _from_native_dtype_unchecked(cls: type[Self], dtype: TBaseDType) -> Self:
-        """
-        Create a ZDType instance from a native data type without performing any type checking of
-        that data type.
-
-        Parameters
-        ----------
-        dtype : TDType
-            The native dtype to wrap.
-
-        Returns
-        -------
-        Self
-            A ZDType that wraps the native dtype.
-        """
         ...
 
     @abstractmethod
@@ -163,6 +138,7 @@ def to_native_dtype(self: Self) -> TDType_co:
         """
         ...
 
+    @abstractmethod
     def cast_scalar(self, data: object) -> TScalar_co:
         """
         Cast a python object to the wrapped scalar type.
@@ -179,15 +155,6 @@ def cast_scalar(self, data: object) -> TScalar_co:
         TScalar
             The cast value.
         """
-        if self._check_scalar(data):
-            return self._cast_scalar_unchecked(data)
-        msg = (
-            f"The value {data!r} failed a type check. "
-            f"It cannot be safely cast to a scalar compatible with {self}. "
-            f"Consult the documentation for {self} to determine the possible values that can "
-            "be cast to scalars of the wrapped data type."
-        )
-        raise TypeError(msg)
 
     @abstractmethod
     def _check_scalar(self, data: object) -> bool:
@@ -206,24 +173,6 @@ def _check_scalar(self, data: object) -> bool:
         """
         ...
 
-    @abstractmethod
-    def _cast_scalar_unchecked(self, data: object) -> TScalar_co:
-        """
-        Cast a python object to the wrapped data type.
-        This method should not perform any type checking.
-
-        Parameters
-        ----------
-        data : object
-            The python object to cast.
-
-        Returns
-        -------
-        TScalar
-            The cast value.
-        """
-        ...
-
     @abstractmethod
     def default_scalar(self) -> TScalar_co:
         """
@@ -309,6 +258,7 @@ def to_json(self, zarr_format: ZarrFormat) -> DTypeJSON_V2 | DTypeJSON_V3:
         ...
 
     @classmethod
+    @abstractmethod
     def from_json_v3(cls: type[Self], data: JSON) -> Self:
         """
         Create an instance of this ZDType from Zarr V3 JSON data.
@@ -323,48 +273,13 @@ def from_json_v3(cls: type[Self], data: JSON) -> Self:
         Self
             The wrapped data type.
         """
-        if cls._check_json_v3(data):
-            return cls._from_json_unchecked(data, zarr_format=3)
-        raise DataTypeValidationError(f"Invalid JSON representation of data type {cls}: {data}")
-
-    @classmethod
-    def from_json_v2(cls: type[Self], data: JSON, *, object_codec_id: str | None) -> Self:
-        """
-        Create an instance of this ZDType from Zarr V2 JSON data.
-
-        Parameters
-        ----------
-        data : JSON
-            The JSON representation of the data type.
-
-        Returns
-        -------
-        Self
-            The wrapped data type.
-        """
-        if cls._check_json_v2(data, object_codec_id=object_codec_id):
-            return cls._from_json_unchecked(data, zarr_format=2)
-        raise DataTypeValidationError(
-            f"Invalid JSON representation of data type {cls}: {data!r}, object_codec_id={object_codec_id!r}"
-        )
-
-    @classmethod
-    @overload
-    def _from_json_unchecked(cls, data: DTypeJSON_V2, *, zarr_format: Literal[2]) -> Self: ...
-    @classmethod
-    @overload
-    def _from_json_unchecked(cls, data: DTypeJSON_V3, *, zarr_format: Literal[3]) -> Self: ...
+        ...
 
     @classmethod
     @abstractmethod
-    def _from_json_unchecked(
-        cls, data: DTypeJSON_V2 | DTypeJSON_V3, *, zarr_format: ZarrFormat
-    ) -> Self:
+    def from_json_v2(cls: type[Self], data: JSON, *, object_codec_id: str | None = None) -> Self:
         """
-        Create a ZDType instance from a JSON representation of a data type.
-
-        This method should be called after input has been type checked, and so it should not perform
-        any input validation.
+        Create an instance of this ZDType from Zarr V2 JSON data.
 
         Parameters
         ----------
@@ -418,3 +333,18 @@ def from_json_scalar(self: Self, data: JSON, *, zarr_format: ZarrFormat) -> TSca
             The deserialized scalar value.
         """
         ...
+
+
+def scalar_failed_type_check_msg(
+    cls_instance: ZDType[TBaseDType, TBaseScalar], bad_scalar: object
+) -> str:
+    """
+    Generate an error message reporting that a particular value failed a type check when attempting
+    to cast that value to a scalar.
+    """
+    return (
+        f"The value {bad_scalar!r} failed a type check. "
+        f"It cannot be safely cast to a scalar compatible with {cls_instance}. "
+        f"Consult the documentation for {cls_instance} to determine the possible values that can "
+        "be cast to scalars of the wrapped data type."
+    )
diff --git a/tests/test_array.py b/tests/test_array.py
index 862b49da61..28ea812967 100644
--- a/tests/test_array.py
+++ b/tests/test_array.py
@@ -7,7 +7,7 @@
 import re
 import sys
 from itertools import accumulate
-from typing import TYPE_CHECKING, Any, Literal, get_args
+from typing import TYPE_CHECKING, Any, Literal
 from unittest import mock
 
 import numcodecs
@@ -42,8 +42,8 @@
 from zarr.core.chunk_key_encodings import ChunkKeyEncodingParams
 from zarr.core.common import JSON, MemoryOrder, ZarrFormat
 from zarr.core.dtype import get_data_type_from_native_dtype
-from zarr.core.dtype.common import Endianness
-from zarr.core.dtype.npy.common import endianness_from_numpy_str
+from zarr.core.dtype.common import ENDIANNESS_STR, EndiannessStr
+from zarr.core.dtype.npy.common import NUMPY_ENDIANNESS_STR, endianness_from_numpy_str
 from zarr.core.dtype.npy.float import Float32, Float64
 from zarr.core.dtype.npy.int import Int16, UInt8
 from zarr.core.dtype.npy.string import VariableLengthUTF8
@@ -1507,16 +1507,18 @@ async def test_name(store: Store, zarr_format: ZarrFormat, path: str | None) ->
                 )
 
     @staticmethod
-    @pytest.mark.parametrize("endianness", get_args(Endianness))
+    @pytest.mark.parametrize("endianness", ENDIANNESS_STR)
     def test_default_endianness(
-        store: Store, zarr_format: ZarrFormat, endianness: Endianness
+        store: Store, zarr_format: ZarrFormat, endianness: EndiannessStr
     ) -> None:
         """
         Test that that endianness is correctly set when creating an array when not specifying a serializer
         """
         dtype = Int16(endianness=endianness)
         arr = zarr.create_array(store=store, shape=(1,), dtype=dtype, zarr_format=zarr_format)
-        assert endianness_from_numpy_str(arr[:].dtype.byteorder) == endianness  # type: ignore[union-attr]
+        byte_order: str = arr[:].dtype.byteorder  # type: ignore[union-attr]
+        assert byte_order in NUMPY_ENDIANNESS_STR
+        assert endianness_from_numpy_str(byte_order) == endianness  # type: ignore[arg-type]
 
 
 @pytest.mark.parametrize("value", [1, 1.4, "a", b"a", np.array(1)])
diff --git a/tests/test_dtype/test_npy/test_common.py b/tests/test_dtype/test_npy/test_common.py
index c4a82e22b0..d39d308112 100644
--- a/tests/test_dtype/test_npy/test_common.py
+++ b/tests/test_dtype/test_npy/test_common.py
@@ -9,9 +9,9 @@
 import numpy as np
 import pytest
 
-from zarr.core.dtype.common import Endianness, JSONFloatV2, SpecialFloatStrings
+from zarr.core.dtype.common import ENDIANNESS_STR, JSONFloatV2, SpecialFloatStrings
 from zarr.core.dtype.npy.common import (
-    EndiannessNumpy,
+    NumpyEndiannessStr,
     bytes_from_json,
     bytes_to_json,
     check_json_bool,
@@ -67,10 +67,10 @@ def test_endianness_from_numpy_str(data: str, expected: str | None) -> None:
     Test that endianness_from_numpy_str correctly converts a numpy str literal to a human-readable literal value.
     This test also checks that an invalid string input raises a ``ValueError``
     """
-    if data in get_args(EndiannessNumpy):
+    if data in get_args(NumpyEndiannessStr):
         assert endianness_from_numpy_str(data) == expected  # type: ignore[arg-type]
     else:
-        msg = f"Invalid endianness: {data!r}. Expected one of {get_args(EndiannessNumpy)}"
+        msg = f"Invalid endianness: {data!r}. Expected one of {get_args(NumpyEndiannessStr)}"
         with pytest.raises(ValueError, match=re.escape(msg)):
             endianness_from_numpy_str(data)  # type: ignore[arg-type]
 
@@ -84,10 +84,10 @@ def test_endianness_to_numpy_str(data: str | None, expected: str) -> None:
     Test that endianness_to_numpy_str correctly converts a human-readable literal value to a numpy str literal.
     This test also checks that an invalid string input raises a ``ValueError``
     """
-    if data in get_args(Endianness) + (None,):
+    if data in ENDIANNESS_STR:
         assert endianness_to_numpy_str(data) == expected  # type: ignore[arg-type]
     else:
-        msg = f"Invalid endianness: {data!r}. Expected one of {get_args(Endianness)}"
+        msg = f"Invalid endianness: {data!r}. Expected one of {ENDIANNESS_STR}"
         with pytest.raises(ValueError, match=re.escape(msg)):
             endianness_to_numpy_str(data)  # type: ignore[arg-type]
 

From b2e56c8e9d342163f9d28b92188994a2985aa3c7 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Sat, 14 Jun 2025 15:16:02 +0300
Subject: [PATCH 127/130] make method definition order consistent

---
 src/zarr/core/dtype/npy/bool.py       |  20 +-
 src/zarr/core/dtype/npy/bytes.py      | 108 +++++------
 src/zarr/core/dtype/npy/complex.py    |  46 ++---
 src/zarr/core/dtype/npy/float.py      |  58 +++---
 src/zarr/core/dtype/npy/int.py        | 264 +++++++++++++-------------
 src/zarr/core/dtype/npy/structured.py |  62 +++---
 src/zarr/core/dtype/npy/time.py       | 121 ++++++------
 src/zarr/core/dtype/wrapper.py        | 146 +++++++-------
 8 files changed, 404 insertions(+), 421 deletions(-)

diff --git a/src/zarr/core/dtype/npy/bool.py b/src/zarr/core/dtype/npy/bool.py
index 2d045ce28a..7570dd1f4f 100644
--- a/src/zarr/core/dtype/npy/bool.py
+++ b/src/zarr/core/dtype/npy/bool.py
@@ -82,6 +82,16 @@ def to_json(self, zarr_format: ZarrFormat) -> Literal["|b1", "bool"]:
             return self._zarr_v3_name
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
+    def _check_scalar(self, data: object) -> bool:
+        # Anything can become a bool
+        return True
+
+    def cast_scalar(self, data: object) -> np.bool_:
+        if self._check_scalar(data):
+            return np.bool_(data)
+        msg = f"Cannot convert object with type {type(data)} to a numpy boolean."
+        raise TypeError(msg)
+
     def default_scalar(self) -> np.bool_:
         """
         Get the default value for the boolean dtype.
@@ -131,16 +141,6 @@ def from_json_scalar(self, data: JSON, *, zarr_format: ZarrFormat) -> np.bool_:
             return np.bool_(data)
         raise TypeError(f"Invalid type: {data}. Expected a boolean.")  # pragma: no cover
 
-    def _check_scalar(self, data: object) -> bool:
-        # Anything can become a bool
-        return True
-
-    def cast_scalar(self, data: object) -> np.bool_:
-        if self._check_scalar(data):
-            return np.bool_(data)
-        msg = f"Cannot convert object with type {type(data)} to a numpy boolean."
-        raise TypeError(msg)
-
     @property
     def item_size(self) -> int:
         return 1
diff --git a/src/zarr/core/dtype/npy/bytes.py b/src/zarr/core/dtype/npy/bytes.py
index d98114e9e1..9dc0bb1a68 100644
--- a/src/zarr/core/dtype/npy/bytes.py
+++ b/src/zarr/core/dtype/npy/bytes.py
@@ -14,7 +14,7 @@
     v3_unstable_dtype_warning,
 )
 from zarr.core.dtype.npy.common import check_json_str
-from zarr.core.dtype.wrapper import DTypeJSON_V2, DTypeJSON_V3, TBaseDType, ZDType
+from zarr.core.dtype.wrapper import TBaseDType, ZDType
 
 BytesLike = np.bytes_ | str | bytes | int
 
@@ -92,30 +92,6 @@ def to_json(self, zarr_format: ZarrFormat) -> str | NullTerminatedBytesJSONV3:
             }
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
-    @classmethod
-    def _from_json_unchecked(
-        cls, data: DTypeJSON_V2 | DTypeJSON_V3, *, zarr_format: ZarrFormat
-    ) -> Self:
-        if zarr_format == 2:
-            return cls.from_native_dtype(np.dtype(data))  # type: ignore[arg-type]
-        elif zarr_format == 3:
-            return cls(length=data["configuration"]["length_bytes"])  # type: ignore[index, call-overload]
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
-
-    def default_scalar(self) -> np.bytes_:
-        return np.bytes_(b"")
-
-    def to_json_scalar(self, data: object, *, zarr_format: ZarrFormat) -> str:
-        as_bytes = self.cast_scalar(data)
-        return base64.standard_b64encode(as_bytes).decode("ascii")
-
-    def from_json_scalar(self, data: JSON, *, zarr_format: ZarrFormat) -> np.bytes_:
-        if check_json_str(data):
-            return self.to_native_dtype().type(base64.standard_b64decode(data.encode("ascii")))
-        raise TypeError(
-            f"Invalid type: {data}. Expected a base64-encoded string."
-        )  # pragma: no cover
-
     def _check_scalar(self, data: object) -> TypeGuard[BytesLike]:
         # this is generous for backwards compatibility
         return isinstance(data, BytesLike)
@@ -139,6 +115,20 @@ def cast_scalar(self, data: object) -> np.bytes_:
         msg = f"Cannot convert object with type {type(data)} to a numpy bytes scalar."
         raise TypeError(msg)
 
+    def default_scalar(self) -> np.bytes_:
+        return np.bytes_(b"")
+
+    def to_json_scalar(self, data: object, *, zarr_format: ZarrFormat) -> str:
+        as_bytes = self.cast_scalar(data)
+        return base64.standard_b64encode(as_bytes).decode("ascii")
+
+    def from_json_scalar(self, data: JSON, *, zarr_format: ZarrFormat) -> np.bytes_:
+        if check_json_str(data):
+            return self.to_native_dtype().type(base64.standard_b64decode(data.encode("ascii")))
+        raise TypeError(
+            f"Invalid type: {data}. Expected a base64-encoded string."
+        )  # pragma: no cover
+
     @property
     def item_size(self) -> int:
         return self.length
@@ -152,6 +142,29 @@ class RawBytes(ZDType[np.dtypes.VoidDType[int], np.void], HasLength, HasItemSize
     dtype_cls = np.dtypes.VoidDType  # type: ignore[assignment]
     _zarr_v3_name: ClassVar[Literal["raw_bytes"]] = "raw_bytes"
 
+    @classmethod
+    def _check_native_dtype(
+        cls: type[Self], dtype: TBaseDType
+    ) -> TypeGuard[np.dtypes.VoidDType[Any]]:
+        """
+        Numpy void dtype comes in two forms:
+        * If the ``fields`` attribute is ``None``, then the dtype represents N raw bytes.
+        * If the ``fields`` attribute is not ``None``, then the dtype represents a structured dtype,
+
+        In this check we ensure that ``fields`` is ``None``.
+
+        Parameters
+        ----------
+        dtype : TDType
+            The dtype to check.
+
+        Returns
+        -------
+        Bool
+            True if the dtype matches, False otherwise.
+        """
+        return cls.dtype_cls is type(dtype) and dtype.fields is None  # type: ignore[has-type]
+
     @classmethod
     def from_native_dtype(cls, dtype: TBaseDType) -> Self:
         if cls._check_native_dtype(dtype):
@@ -208,40 +221,6 @@ def to_json(self, zarr_format: ZarrFormat) -> str | RawBytesJSONV3:
             return {"name": self._zarr_v3_name, "configuration": {"length_bytes": self.length}}
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
-    @classmethod
-    def _check_native_dtype(
-        cls: type[Self], dtype: TBaseDType
-    ) -> TypeGuard[np.dtypes.VoidDType[Any]]:
-        """
-        Numpy void dtype comes in two forms:
-        * If the ``fields`` attribute is ``None``, then the dtype represents N raw bytes.
-        * If the ``fields`` attribute is not ``None``, then the dtype represents a structured dtype,
-
-        In this check we ensure that ``fields`` is ``None``.
-
-        Parameters
-        ----------
-        dtype : TDType
-            The dtype to check.
-
-        Returns
-        -------
-        Bool
-            True if the dtype matches, False otherwise.
-        """
-        return cls.dtype_cls is type(dtype) and dtype.fields is None  # type: ignore[has-type]
-
-    def default_scalar(self) -> np.void:
-        return self.to_native_dtype().type(("\x00" * self.length).encode("ascii"))
-
-    def to_json_scalar(self, data: object, *, zarr_format: ZarrFormat) -> str:
-        return base64.standard_b64encode(self.cast_scalar(data).tobytes()).decode("ascii")
-
-    def from_json_scalar(self, data: JSON, *, zarr_format: ZarrFormat) -> np.void:
-        if check_json_str(data):
-            return self.to_native_dtype().type(base64.standard_b64decode(data))
-        raise TypeError(f"Invalid type: {data}. Expected a string.")  # pragma: no cover
-
     def _check_scalar(self, data: object) -> bool:
         return isinstance(data, np.bytes_ | str | bytes | np.void)
 
@@ -258,6 +237,17 @@ def cast_scalar(self, data: object) -> np.void:
         msg = f"Cannot convert object with type {type(data)} to a numpy void scalar."
         raise TypeError(msg)
 
+    def default_scalar(self) -> np.void:
+        return self.to_native_dtype().type(("\x00" * self.length).encode("ascii"))
+
+    def to_json_scalar(self, data: object, *, zarr_format: ZarrFormat) -> str:
+        return base64.standard_b64encode(self.cast_scalar(data).tobytes()).decode("ascii")
+
+    def from_json_scalar(self, data: JSON, *, zarr_format: ZarrFormat) -> np.void:
+        if check_json_str(data):
+            return self.to_native_dtype().type(base64.standard_b64decode(data))
+        raise TypeError(f"Invalid type: {data}. Expected a string.")  # pragma: no cover
+
     @property
     def item_size(self) -> int:
         return self.length
diff --git a/src/zarr/core/dtype/npy/complex.py b/src/zarr/core/dtype/npy/complex.py
index 2df60f930b..bc06af00b5 100644
--- a/src/zarr/core/dtype/npy/complex.py
+++ b/src/zarr/core/dtype/npy/complex.py
@@ -43,26 +43,6 @@ def to_native_dtype(self) -> TComplexDType_co:
         byte_order = endianness_to_numpy_str(self.endianness)
         return self.dtype_cls().newbyteorder(byte_order)  # type: ignore[return-value]
 
-    def to_json(self, zarr_format: ZarrFormat) -> str:
-        """
-        Convert the wrapped data type to a JSON-serializable form.
-
-        Parameters
-        ----------
-        zarr_format : ZarrFormat
-            The zarr format version.
-
-        Returns
-        -------
-        str
-            The JSON-serializable representation of the wrapped data type
-        """
-        if zarr_format == 2:
-            return self.to_native_dtype().str
-        elif zarr_format == 3:
-            return self._zarr_v3_name
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
-
     @classmethod
     def _check_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> TypeGuard[str]:
         """
@@ -90,18 +70,38 @@ def from_json_v3(cls, data: JSON) -> Self:
         msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected {cls._zarr_v3_name}."
         raise DataTypeValidationError(msg)
 
+    def to_json(self, zarr_format: ZarrFormat) -> str:
+        """
+        Convert the wrapped data type to a JSON-serializable form.
+
+        Parameters
+        ----------
+        zarr_format : ZarrFormat
+            The zarr format version.
+
+        Returns
+        -------
+        str
+            The JSON-serializable representation of the wrapped data type
+        """
+        if zarr_format == 2:
+            return self.to_native_dtype().str
+        elif zarr_format == 3:
+            return self._zarr_v3_name
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+
     def _check_scalar(self, data: object) -> TypeGuard[ComplexLike]:
         return isinstance(data, ComplexLike)
 
+    def _cast_scalar_unchecked(self, data: ComplexLike) -> TComplexScalar_co:
+        return self.to_native_dtype().type(data)  # type: ignore[return-value]
+
     def cast_scalar(self, data: object) -> TComplexScalar_co:
         if self._check_scalar(data):
             return self._cast_scalar_unchecked(data)
         msg = f"Cannot convert object with type {type(data)} to a numpy float scalar."
         raise TypeError(msg)
 
-    def _cast_scalar_unchecked(self, data: ComplexLike) -> TComplexScalar_co:
-        return self.to_native_dtype().type(data)  # type: ignore[return-value]
-
     def default_scalar(self) -> TComplexScalar_co:
         """
         Get the default value, which is 0 cast to this dtype
diff --git a/src/zarr/core/dtype/npy/float.py b/src/zarr/core/dtype/npy/float.py
index 60a05326d5..9271886c20 100644
--- a/src/zarr/core/dtype/npy/float.py
+++ b/src/zarr/core/dtype/npy/float.py
@@ -23,7 +23,7 @@
     float_to_json_v3,
     get_endianness_from_numpy_dtype,
 )
-from zarr.core.dtype.wrapper import DTypeJSON_V2, DTypeJSON_V3, TBaseDType, ZDType
+from zarr.core.dtype.wrapper import TBaseDType, ZDType
 
 
 @dataclass(frozen=True)
@@ -43,36 +43,6 @@ def to_native_dtype(self) -> TFloatDType_co:
         byte_order = endianness_to_numpy_str(self.endianness)
         return self.dtype_cls().newbyteorder(byte_order)  # type: ignore[return-value]
 
-    def to_json(self, zarr_format: ZarrFormat) -> str:
-        """
-        Convert the wrapped data type to a JSON-serializable form.
-
-        Parameters
-        ----------
-        zarr_format : ZarrFormat
-            The zarr format version.
-
-        Returns
-        -------
-        str
-            The JSON-serializable representation of the wrapped data type
-        """
-        if zarr_format == 2:
-            return self.to_native_dtype().str
-        elif zarr_format == 3:
-            return self._zarr_v3_name
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
-
-    @classmethod
-    def _from_json_unchecked(
-        cls, data: DTypeJSON_V2 | DTypeJSON_V3, *, zarr_format: ZarrFormat
-    ) -> Self:
-        if zarr_format == 2:
-            return cls.from_native_dtype(np.dtype(data))  # type: ignore[arg-type]
-        elif zarr_format == 3:
-            return cls()
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
-
     @classmethod
     def _check_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> TypeGuard[str]:
         """
@@ -100,18 +70,38 @@ def from_json_v3(cls, data: JSON) -> Self:
         msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected {cls._zarr_v3_name}."
         raise DataTypeValidationError(msg)
 
+    def to_json(self, zarr_format: ZarrFormat) -> str:
+        """
+        Convert the wrapped data type to a JSON-serializable form.
+
+        Parameters
+        ----------
+        zarr_format : ZarrFormat
+            The zarr format version.
+
+        Returns
+        -------
+        str
+            The JSON-serializable representation of the wrapped data type
+        """
+        if zarr_format == 2:
+            return self.to_native_dtype().str
+        elif zarr_format == 3:
+            return self._zarr_v3_name
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
+
     def _check_scalar(self, data: object) -> TypeGuard[FloatLike]:
         return isinstance(data, FloatLike)
 
+    def _cast_scalar_unchecked(self, data: FloatLike) -> TFloatScalar_co:
+        return self.to_native_dtype().type(data)  # type: ignore[return-value]
+
     def cast_scalar(self, data: object) -> TFloatScalar_co:
         if self._check_scalar(data):
             return self._cast_scalar_unchecked(data)
         msg = f"Cannot convert object with type {type(data)} to a numpy float scalar."
         raise ScalarTypeValidationError(msg)
 
-    def _cast_scalar_unchecked(self, data: FloatLike) -> TFloatScalar_co:
-        return self.to_native_dtype().type(data)  # type: ignore[return-value]
-
     def default_scalar(self) -> TFloatScalar_co:
         """
         Get the default value, which is 0 cast to this dtype
diff --git a/src/zarr/core/dtype/npy/int.py b/src/zarr/core/dtype/npy/int.py
index 804e9e359a..cddcb26c5e 100644
--- a/src/zarr/core/dtype/npy/int.py
+++ b/src/zarr/core/dtype/npy/int.py
@@ -143,6 +143,23 @@ def from_native_dtype(cls, dtype: TBaseDType) -> Self:
             f"Invalid data type: {dtype}. Expected an instance of {cls.dtype_cls}"
         )
 
+    def to_native_dtype(self: Self) -> np.dtypes.Int8DType:
+        return self.dtype_cls()
+
+    @classmethod
+    def from_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> Self:
+        if cls._check_json_v2(data, object_codec_id=object_codec_id):
+            return cls()
+        msg = f"Invalid JSON representation of Int8. Got {data!r}, expected the string {cls._zarr_v2_names[0]!r}"
+        raise DataTypeValidationError(msg)
+
+    @classmethod
+    def from_json_v3(cls, data: JSON) -> Self:
+        if cls._check_json_v3(data):
+            return cls()
+        msg = f"Invalid JSON representation of Int8. Got {data!r}, expected the string {cls._zarr_v3_name!r}"
+        raise DataTypeValidationError(msg)
+
     @overload
     def to_json(self, zarr_format: Literal[2]) -> Literal["|i1"]: ...
 
@@ -169,23 +186,6 @@ def to_json(self, zarr_format: ZarrFormat) -> Literal["int8", "|i1"]:
             return self._zarr_v3_name
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
-    def to_native_dtype(self: Self) -> np.dtypes.Int8DType:
-        return self.dtype_cls()
-
-    @classmethod
-    def from_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> Self:
-        if cls._check_json_v2(data, object_codec_id=object_codec_id):
-            return cls()
-        msg = f"Invalid JSON representation of Int8. Got {data!r}, expected the string {cls._zarr_v2_names[0]!r}"
-        raise DataTypeValidationError(msg)
-
-    @classmethod
-    def from_json_v3(cls, data: JSON) -> Self:
-        if cls._check_json_v3(data):
-            return cls()
-        msg = f"Invalid JSON representation of Int8. Got {data!r}, expected the string {cls._zarr_v3_name!r}"
-        raise DataTypeValidationError(msg)
-
     @property
     def item_size(self) -> int:
         return 1
@@ -208,6 +208,23 @@ def from_native_dtype(cls, dtype: TBaseDType) -> Self:
             f"Invalid data type: {dtype}. Expected an instance of {cls.dtype_cls}"
         )
 
+    def to_native_dtype(self: Self) -> np.dtypes.UInt8DType:
+        return self.dtype_cls()
+
+    @classmethod
+    def from_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> Self:
+        if cls._check_json_v2(data, object_codec_id=object_codec_id):
+            return cls()
+        msg = f"Invalid JSON representation of UInt8. Got {data!r}, expected the string {cls._zarr_v2_names[0]!r}"
+        raise DataTypeValidationError(msg)
+
+    @classmethod
+    def from_json_v3(cls, data: JSON) -> Self:
+        if cls._check_json_v3(data):
+            return cls()
+        msg = f"Invalid JSON representation of UInt8. Got {data!r}, expected the string {cls._zarr_v3_name!r}"
+        raise DataTypeValidationError(msg)
+
     @overload
     def to_json(self, zarr_format: Literal[2]) -> Literal["|u1"]: ...
 
@@ -234,34 +251,45 @@ def to_json(self, zarr_format: ZarrFormat) -> Literal["uint8", "|u1"]:
             return self._zarr_v3_name
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
-    def to_native_dtype(self: Self) -> np.dtypes.UInt8DType:
-        return self.dtype_cls()
+    @property
+    def item_size(self) -> int:
+        return 1
+
+
+@dataclass(frozen=True, kw_only=True)
+class Int16(BaseInt[np.dtypes.Int16DType, np.int16], HasEndianness):
+    dtype_cls = np.dtypes.Int16DType
+    _zarr_v3_name: ClassVar[Literal["int16"]] = "int16"
+    _zarr_v2_names: ClassVar[tuple[Literal[">i2"], Literal["<i2"]]] = (">i2", "<i2")
+
+    @classmethod
+    def from_native_dtype(cls, dtype: TBaseDType) -> Self:
+        if cls._check_native_dtype(dtype):
+            return cls(endianness=get_endianness_from_numpy_dtype(dtype))
+        raise DataTypeValidationError(
+            f"Invalid data type: {dtype}. Expected an instance of {cls.dtype_cls}"
+        )
+
+    def to_native_dtype(self) -> np.dtypes.Int16DType:
+        byte_order = endianness_to_numpy_str(self.endianness)
+        return self.dtype_cls().newbyteorder(byte_order)
 
     @classmethod
     def from_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> Self:
         if cls._check_json_v2(data, object_codec_id=object_codec_id):
-            return cls()
-        msg = f"Invalid JSON representation of UInt8. Got {data!r}, expected the string {cls._zarr_v2_names[0]!r}"
+            # Going via numpy ensures that we get the endianness correct without
+            # annoying string parsing.
+            return cls.from_native_dtype(np.dtype(data))
+        msg = f"Invalid JSON representation of Int16. Got {data!r}, expected one of the strings {cls._zarr_v2_names!r}."
         raise DataTypeValidationError(msg)
 
     @classmethod
     def from_json_v3(cls, data: JSON) -> Self:
         if cls._check_json_v3(data):
             return cls()
-        msg = f"Invalid JSON representation of UInt8. Got {data!r}, expected the string {cls._zarr_v3_name!r}"
+        msg = f"Invalid JSON representation of Int16. Got {data!r}, expected the string {cls._zarr_v3_name!r}"
         raise DataTypeValidationError(msg)
 
-    @property
-    def item_size(self) -> int:
-        return 1
-
-
-@dataclass(frozen=True, kw_only=True)
-class Int16(BaseInt[np.dtypes.Int16DType, np.int16], HasEndianness):
-    dtype_cls = np.dtypes.Int16DType
-    _zarr_v3_name: ClassVar[Literal["int16"]] = "int16"
-    _zarr_v2_names: ClassVar[tuple[Literal[">i2"], Literal["<i2"]]] = (">i2", "<i2")
-
     @overload
     def to_json(self, zarr_format: Literal[2]) -> Literal[">i2", "<i2"]: ...
 
@@ -288,6 +316,17 @@ def to_json(self, zarr_format: ZarrFormat) -> Literal["int16", ">i2", "<i2"]:
             return self._zarr_v3_name
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
+    @property
+    def item_size(self) -> int:
+        return 2
+
+
+@dataclass(frozen=True, kw_only=True)
+class UInt16(BaseInt[np.dtypes.UInt16DType, np.uint16], HasEndianness):
+    dtype_cls = np.dtypes.UInt16DType
+    _zarr_v3_name: ClassVar[Literal["uint16"]] = "uint16"
+    _zarr_v2_names: ClassVar[tuple[Literal[">u2"], Literal["<u2"]]] = (">u2", "<u2")
+
     @classmethod
     def from_native_dtype(cls, dtype: TBaseDType) -> Self:
         if cls._check_native_dtype(dtype):
@@ -296,7 +335,7 @@ def from_native_dtype(cls, dtype: TBaseDType) -> Self:
             f"Invalid data type: {dtype}. Expected an instance of {cls.dtype_cls}"
         )
 
-    def to_native_dtype(self) -> np.dtypes.Int16DType:
+    def to_native_dtype(self) -> np.dtypes.UInt16DType:
         byte_order = endianness_to_numpy_str(self.endianness)
         return self.dtype_cls().newbyteorder(byte_order)
 
@@ -306,27 +345,16 @@ def from_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> Self
             # Going via numpy ensures that we get the endianness correct without
             # annoying string parsing.
             return cls.from_native_dtype(np.dtype(data))
-        msg = f"Invalid JSON representation of Int16. Got {data!r}, expected one of the strings {cls._zarr_v2_names!r}."
+        msg = f"Invalid JSON representation of UInt16. Got {data!r}, expected one of the strings {cls._zarr_v2_names}."
         raise DataTypeValidationError(msg)
 
     @classmethod
     def from_json_v3(cls, data: JSON) -> Self:
         if cls._check_json_v3(data):
             return cls()
-        msg = f"Invalid JSON representation of Int16. Got {data!r}, expected the string {cls._zarr_v3_name!r}"
+        msg = f"Invalid JSON representation of UInt16. Got {data!r}, expected the string {cls._zarr_v3_name!r}"
         raise DataTypeValidationError(msg)
 
-    @property
-    def item_size(self) -> int:
-        return 2
-
-
-@dataclass(frozen=True, kw_only=True)
-class UInt16(BaseInt[np.dtypes.UInt16DType, np.uint16], HasEndianness):
-    dtype_cls = np.dtypes.UInt16DType
-    _zarr_v3_name: ClassVar[Literal["uint16"]] = "uint16"
-    _zarr_v2_names: ClassVar[tuple[Literal[">u2"], Literal["<u2"]]] = (">u2", "<u2")
-
     @overload
     def to_json(self, zarr_format: Literal[2]) -> Literal[">u2", "<u2"]: ...
 
@@ -353,15 +381,26 @@ def to_json(self, zarr_format: ZarrFormat) -> Literal["uint16", ">u2", "<u2"]:
             return self._zarr_v3_name
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
+    @property
+    def item_size(self) -> int:
+        return 2
+
+
+@dataclass(frozen=True, kw_only=True)
+class Int32(BaseInt[np.dtypes.Int32DType, np.int32], HasEndianness):
+    dtype_cls = np.dtypes.Int32DType
+    _zarr_v3_name: ClassVar[Literal["int32"]] = "int32"
+    _zarr_v2_names: ClassVar[tuple[Literal[">i4"], Literal["<i4"]]] = (">i4", "<i4")
+
     @classmethod
-    def from_native_dtype(cls, dtype: TBaseDType) -> Self:
+    def from_native_dtype(cls: type[Self], dtype: TBaseDType) -> Self:
         if cls._check_native_dtype(dtype):
             return cls(endianness=get_endianness_from_numpy_dtype(dtype))
         raise DataTypeValidationError(
             f"Invalid data type: {dtype}. Expected an instance of {cls.dtype_cls}"
         )
 
-    def to_native_dtype(self) -> np.dtypes.UInt16DType:
+    def to_native_dtype(self) -> np.dtypes.Int32DType:
         byte_order = endianness_to_numpy_str(self.endianness)
         return self.dtype_cls().newbyteorder(byte_order)
 
@@ -371,27 +410,16 @@ def from_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> Self
             # Going via numpy ensures that we get the endianness correct without
             # annoying string parsing.
             return cls.from_native_dtype(np.dtype(data))
-        msg = f"Invalid JSON representation of UInt16. Got {data!r}, expected one of the strings {cls._zarr_v2_names}."
+        msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected one of the strings {cls._zarr_v2_names}."
         raise DataTypeValidationError(msg)
 
     @classmethod
     def from_json_v3(cls, data: JSON) -> Self:
         if cls._check_json_v3(data):
             return cls()
-        msg = f"Invalid JSON representation of UInt16. Got {data!r}, expected the string {cls._zarr_v3_name!r}"
+        msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected the string {cls._zarr_v3_name!r}"
         raise DataTypeValidationError(msg)
 
-    @property
-    def item_size(self) -> int:
-        return 2
-
-
-@dataclass(frozen=True, kw_only=True)
-class Int32(BaseInt[np.dtypes.Int32DType, np.int32], HasEndianness):
-    dtype_cls = np.dtypes.Int32DType
-    _zarr_v3_name: ClassVar[Literal["int32"]] = "int32"
-    _zarr_v2_names: ClassVar[tuple[Literal[">i4"], Literal["<i4"]]] = (">i4", "<i4")
-
     @overload
     def to_json(self, zarr_format: Literal[2]) -> Literal[">i4", "<i4"]: ...
 
@@ -418,15 +446,26 @@ def to_json(self, zarr_format: ZarrFormat) -> Literal["int32", ">i4", "<i4"]:
             return self._zarr_v3_name
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
+    @property
+    def item_size(self) -> int:
+        return 4
+
+
+@dataclass(frozen=True, kw_only=True)
+class UInt32(BaseInt[np.dtypes.UInt32DType, np.uint32], HasEndianness):
+    dtype_cls = np.dtypes.UInt32DType
+    _zarr_v3_name: ClassVar[Literal["uint32"]] = "uint32"
+    _zarr_v2_names: ClassVar[tuple[Literal[">u4"], Literal["<u4"]]] = (">u4", "<u4")
+
     @classmethod
-    def from_native_dtype(cls: type[Self], dtype: TBaseDType) -> Self:
+    def from_native_dtype(cls, dtype: TBaseDType) -> Self:
         if cls._check_native_dtype(dtype):
             return cls(endianness=get_endianness_from_numpy_dtype(dtype))
         raise DataTypeValidationError(
             f"Invalid data type: {dtype}. Expected an instance of {cls.dtype_cls}"
         )
 
-    def to_native_dtype(self) -> np.dtypes.Int32DType:
+    def to_native_dtype(self) -> np.dtypes.UInt32DType:
         byte_order = endianness_to_numpy_str(self.endianness)
         return self.dtype_cls().newbyteorder(byte_order)
 
@@ -436,27 +475,16 @@ def from_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> Self
             # Going via numpy ensures that we get the endianness correct without
             # annoying string parsing.
             return cls.from_native_dtype(np.dtype(data))
-        msg = f"Invalid JSON representation of Int32. Got {data!r}, expected one of the strings {cls._zarr_v2_names}."
+        msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected one of the strings {cls._zarr_v2_names}."
         raise DataTypeValidationError(msg)
 
     @classmethod
     def from_json_v3(cls, data: JSON) -> Self:
         if cls._check_json_v3(data):
             return cls()
-        msg = f"Invalid JSON representation of Int32. Got {data!r}, expected the string {cls._zarr_v3_name!r}"
+        msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected the string {cls._zarr_v3_name!r}"
         raise DataTypeValidationError(msg)
 
-    @property
-    def item_size(self) -> int:
-        return 4
-
-
-@dataclass(frozen=True, kw_only=True)
-class UInt32(BaseInt[np.dtypes.UInt32DType, np.uint32], HasEndianness):
-    dtype_cls = np.dtypes.UInt32DType
-    _zarr_v3_name: ClassVar[Literal["uint32"]] = "uint32"
-    _zarr_v2_names: ClassVar[tuple[Literal[">u4"], Literal["<u4"]]] = (">u4", "<u4")
-
     @overload
     def to_json(self, zarr_format: Literal[2]) -> Literal[">u4", "<u4"]: ...
     @overload
@@ -481,6 +509,17 @@ def to_json(self, zarr_format: ZarrFormat) -> Literal["uint32", ">u4", "<u4"]:
             return self._zarr_v3_name
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
 
+    @property
+    def item_size(self) -> int:
+        return 4
+
+
+@dataclass(frozen=True, kw_only=True)
+class Int64(BaseInt[np.dtypes.Int64DType, np.int64], HasEndianness):
+    dtype_cls = np.dtypes.Int64DType
+    _zarr_v3_name: ClassVar[Literal["int64"]] = "int64"
+    _zarr_v2_names: ClassVar[tuple[Literal[">i8"], Literal["<i8"]]] = (">i8", "<i8")
+
     @classmethod
     def from_native_dtype(cls, dtype: TBaseDType) -> Self:
         if cls._check_native_dtype(dtype):
@@ -489,7 +528,7 @@ def from_native_dtype(cls, dtype: TBaseDType) -> Self:
             f"Invalid data type: {dtype}. Expected an instance of {cls.dtype_cls}"
         )
 
-    def to_native_dtype(self) -> np.dtypes.UInt32DType:
+    def to_native_dtype(self) -> np.dtypes.Int64DType:
         byte_order = endianness_to_numpy_str(self.endianness)
         return self.dtype_cls().newbyteorder(byte_order)
 
@@ -499,27 +538,16 @@ def from_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> Self
             # Going via numpy ensures that we get the endianness correct without
             # annoying string parsing.
             return cls.from_native_dtype(np.dtype(data))
-        msg = f"Invalid JSON representation of UInt32. Got {data!r}, expected one of the strings {cls._zarr_v2_names}."
+        msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected one of the strings {cls._zarr_v2_names}."
         raise DataTypeValidationError(msg)
 
     @classmethod
     def from_json_v3(cls, data: JSON) -> Self:
         if cls._check_json_v3(data):
             return cls()
-        msg = f"Invalid JSON representation of UInt32. Got {data!r}, expected the string {cls._zarr_v3_name!r}"
+        msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected the string {cls._zarr_v3_name!r}"
         raise DataTypeValidationError(msg)
 
-    @property
-    def item_size(self) -> int:
-        return 4
-
-
-@dataclass(frozen=True, kw_only=True)
-class Int64(BaseInt[np.dtypes.Int64DType, np.int64], HasEndianness):
-    dtype_cls = np.dtypes.Int64DType
-    _zarr_v3_name: ClassVar[Literal["int64"]] = "int64"
-    _zarr_v2_names: ClassVar[tuple[Literal[">i8"], Literal["<i8"]]] = (">i8", "<i8")
-
     @overload
     def to_json(self, zarr_format: Literal[2]) -> Literal[">i8", "<i8"]: ...
     @overload
@@ -544,15 +572,18 @@ def to_json(self, zarr_format: ZarrFormat) -> Literal["int64", ">i8", "<i8"]:
             return self._zarr_v3_name
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
 
-    @classmethod
-    def from_native_dtype(cls, dtype: TBaseDType) -> Self:
-        if cls._check_native_dtype(dtype):
-            return cls(endianness=get_endianness_from_numpy_dtype(dtype))
-        raise DataTypeValidationError(
-            f"Invalid data type: {dtype}. Expected an instance of {cls.dtype_cls}"
-        )
+    @property
+    def item_size(self) -> int:
+        return 8
 
-    def to_native_dtype(self) -> np.dtypes.Int64DType:
+
+@dataclass(frozen=True, kw_only=True)
+class UInt64(BaseInt[np.dtypes.UInt64DType, np.uint64], HasEndianness):
+    dtype_cls = np.dtypes.UInt64DType
+    _zarr_v3_name: ClassVar[Literal["uint64"]] = "uint64"
+    _zarr_v2_names: ClassVar[tuple[Literal[">u8"], Literal["<u8"]]] = (">u8", "<u8")
+
+    def to_native_dtype(self) -> np.dtypes.UInt64DType:
         byte_order = endianness_to_numpy_str(self.endianness)
         return self.dtype_cls().newbyteorder(byte_order)
 
@@ -562,27 +593,16 @@ def from_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> Self
             # Going via numpy ensures that we get the endianness correct without
             # annoying string parsing.
             return cls.from_native_dtype(np.dtype(data))
-        msg = f"Invalid JSON representation of Int64. Got {data!r}, expected one of the strings {cls._zarr_v2_names}."
+        msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected one of the strings {cls._zarr_v2_names}."
         raise DataTypeValidationError(msg)
 
     @classmethod
     def from_json_v3(cls, data: JSON) -> Self:
         if cls._check_json_v3(data):
             return cls()
-        msg = f"Invalid JSON representation of Int64. Got {data!r}, expected the string {cls._zarr_v3_name!r}"
+        msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected the string {cls._zarr_v3_name!r}"
         raise DataTypeValidationError(msg)
 
-    @property
-    def item_size(self) -> int:
-        return 8
-
-
-@dataclass(frozen=True, kw_only=True)
-class UInt64(BaseInt[np.dtypes.UInt64DType, np.uint64], HasEndianness):
-    dtype_cls = np.dtypes.UInt64DType
-    _zarr_v3_name: ClassVar[Literal["uint64"]] = "uint64"
-    _zarr_v2_names: ClassVar[tuple[Literal[">u8"], Literal["<u8"]]] = (">u8", "<u8")
-
     @overload
     def to_json(self, zarr_format: Literal[2]) -> Literal[">u8", "<u8"]: ...
 
@@ -617,26 +637,6 @@ def from_native_dtype(cls, dtype: TBaseDType) -> Self:
             f"Invalid data type: {dtype}. Expected an instance of {cls.dtype_cls}"
         )
 
-    def to_native_dtype(self) -> np.dtypes.UInt64DType:
-        byte_order = endianness_to_numpy_str(self.endianness)
-        return self.dtype_cls().newbyteorder(byte_order)
-
-    @classmethod
-    def from_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> Self:
-        if cls._check_json_v2(data, object_codec_id=object_codec_id):
-            # Going via numpy ensures that we get the endianness correct without
-            # annoying string parsing.
-            return cls.from_native_dtype(np.dtype(data))
-        msg = f"Invalid JSON representation of UInt64. Got {data!r}, expected one of the strings {cls._zarr_v2_names}."
-        raise DataTypeValidationError(msg)
-
-    @classmethod
-    def from_json_v3(cls, data: JSON) -> Self:
-        if cls._check_json_v3(data):
-            return cls()
-        msg = f"Invalid JSON representation of UInt64. Got {data!r}, expected the string {cls._zarr_v3_name!r}"
-        raise DataTypeValidationError(msg)
-
     @property
     def item_size(self) -> int:
         return 8
diff --git a/src/zarr/core/dtype/npy/structured.py b/src/zarr/core/dtype/npy/structured.py
index b6196b7fed..190647c1e1 100644
--- a/src/zarr/core/dtype/npy/structured.py
+++ b/src/zarr/core/dtype/npy/structured.py
@@ -67,25 +67,6 @@ def to_native_dtype(self) -> np.dtypes.VoidDType[int]:
             np.dtype([(key, dtype.to_native_dtype()) for (key, dtype) in self.fields]),
         )
 
-    @overload
-    def to_json(self, zarr_format: Literal[2]) -> DTypeJSON_V2: ...
-
-    @overload
-    def to_json(self, zarr_format: Literal[3]) -> DTypeJSON_V3: ...
-
-    def to_json(self, zarr_format: ZarrFormat) -> DTypeJSON_V3 | DTypeJSON_V2:
-        fields = [
-            (f_name, f_dtype.to_json(zarr_format=zarr_format)) for f_name, f_dtype in self.fields
-        ]
-        if zarr_format == 2:
-            return fields
-        elif zarr_format == 3:
-            v3_unstable_dtype_warning(self)
-            base_dict = {"name": self._zarr_v3_name}
-            base_dict["configuration"] = {"fields": fields}  # type: ignore[assignment]
-            return cast("DTypeJSON_V3", base_dict)
-        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
-
     @classmethod
     def _check_json_v2(
         cls, data: JSON, *, object_codec_id: str | None = None
@@ -148,21 +129,28 @@ def from_json_v3(cls, data: JSON) -> Self:
         msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected a JSON object with the key {cls._zarr_v3_name!r}"
         raise DataTypeValidationError(msg)
 
-    def to_json_scalar(self, data: object, *, zarr_format: ZarrFormat) -> str:
-        return bytes_to_json(self.cast_scalar(data).tobytes(), zarr_format)
+    @overload
+    def to_json(self, zarr_format: Literal[2]) -> DTypeJSON_V2: ...
 
-    def _check_scalar(self, data: object) -> TypeGuard[StructuredScalarLike]:
-        # TODO: implement something here!
-        return True
+    @overload
+    def to_json(self, zarr_format: Literal[3]) -> DTypeJSON_V3: ...
 
-    def default_scalar(self) -> np.void:
-        return self._cast_scalar_unchecked(0)
+    def to_json(self, zarr_format: ZarrFormat) -> DTypeJSON_V3 | DTypeJSON_V2:
+        fields = [
+            (f_name, f_dtype.to_json(zarr_format=zarr_format)) for f_name, f_dtype in self.fields
+        ]
+        if zarr_format == 2:
+            return fields
+        elif zarr_format == 3:
+            v3_unstable_dtype_warning(self)
+            base_dict = {"name": self._zarr_v3_name}
+            base_dict["configuration"] = {"fields": fields}  # type: ignore[assignment]
+            return cast("DTypeJSON_V3", base_dict)
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
-    def cast_scalar(self, data: object) -> np.void:
-        if self._check_scalar(data):
-            return self._cast_scalar_unchecked(data)
-        msg = f"Cannot convert object with type {type(data)} to a numpy structured scalar."
-        raise TypeError(msg)
+    def _check_scalar(self, data: object) -> TypeGuard[StructuredScalarLike]:
+        # TODO: implement something more precise here!
+        return isinstance(data, (bytes, list, tuple, int))
 
     def _cast_scalar_unchecked(self, data: StructuredScalarLike) -> np.void:
         na_dtype = self.to_native_dtype()
@@ -174,6 +162,15 @@ def _cast_scalar_unchecked(self, data: StructuredScalarLike) -> np.void:
             res = np.array([data], dtype=na_dtype)[0]
         return cast("np.void", res)
 
+    def cast_scalar(self, data: object) -> np.void:
+        if self._check_scalar(data):
+            return self._cast_scalar_unchecked(data)
+        msg = f"Cannot convert object with type {type(data)} to a numpy structured scalar."
+        raise TypeError(msg)
+
+    def default_scalar(self) -> np.void:
+        return self._cast_scalar_unchecked(0)
+
     def from_json_scalar(self, data: JSON, *, zarr_format: ZarrFormat) -> np.void:
         if check_json_str(data):
             as_bytes = bytes_from_json(data, zarr_format=zarr_format)
@@ -181,6 +178,9 @@ def from_json_scalar(self, data: JSON, *, zarr_format: ZarrFormat) -> np.void:
             return cast("np.void", np.array([as_bytes]).view(dtype)[0])
         raise TypeError(f"Invalid type: {data}. Expected a string.")
 
+    def to_json_scalar(self, data: object, *, zarr_format: ZarrFormat) -> str:
+        return bytes_to_json(self.cast_scalar(data).tobytes(), zarr_format)
+
     @property
     def item_size(self) -> int:
         # Lets have numpy do the arithmetic here
diff --git a/src/zarr/core/dtype/npy/time.py b/src/zarr/core/dtype/npy/time.py
index dd4f3840b1..a99703dd3e 100644
--- a/src/zarr/core/dtype/npy/time.py
+++ b/src/zarr/core/dtype/npy/time.py
@@ -184,28 +184,6 @@ class TimeDelta64(TimeDTypeBase[np.dtypes.TimeDelta64DType, np.timedelta64], Has
     scale_factor: int = 1
     unit: DateTimeUnit = "generic"
 
-    def default_scalar(self) -> np.timedelta64:
-        return np.timedelta64("NaT")
-
-    def from_json_scalar(self, data: JSON, *, zarr_format: ZarrFormat) -> np.timedelta64:
-        if check_json_time(data):
-            return self.to_native_dtype().type(data, f"{self.scale_factor}{self.unit}")
-        raise TypeError(f"Invalid type: {data}. Expected an integer.")  # pragma: no cover
-
-    def _check_scalar(self, data: object) -> TypeGuard[TimeDeltaLike]:
-        if data is None:
-            return True
-        return isinstance(data, str | int | bytes | np.timedelta64 | timedelta)
-
-    def _cast_scalar_unchecked(self, data: TimeDeltaLike) -> np.timedelta64:
-        return self.to_native_dtype().type(data, f"{self.scale_factor}{self.unit}")
-
-    def cast_scalar(self, data: object) -> np.timedelta64:
-        if self._check_scalar(data):
-            return self._cast_scalar_unchecked(data)
-        msg = f"Cannot convert object with type {type(data)} to a numpy timedelta64 scalar."
-        raise TypeError(msg)
-
     @classmethod
     def _check_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> TypeGuard[str]:
         # match <m[ns], >m[M], etc
@@ -255,52 +233,37 @@ def from_json_v3(cls, data: JSON) -> Self:
         )
         raise DataTypeValidationError(msg)
 
-
-@dataclass(frozen=True, kw_only=True, slots=True)
-class DateTime64(TimeDTypeBase[np.dtypes.DateTime64DType, np.datetime64], HasEndianness):
-    dtype_cls = np.dtypes.DateTime64DType  # type: ignore[assignment]
-    _zarr_v3_name: ClassVar[Literal["numpy.datetime64"]] = "numpy.datetime64"
-    _zarr_v2_names = (">M8", "<M8")
-    _numpy_name = "datetime64"
-    unit: DateTimeUnit = "generic"
-    scale_factor: int = 1
-
-    def default_scalar(self) -> np.datetime64:
-        return np.datetime64("NaT")
-
-    def from_json_scalar(self, data: JSON, *, zarr_format: ZarrFormat) -> np.datetime64:
-        if check_json_time(data):
-            return self._cast_scalar_unchecked(data)
-        raise TypeError(f"Invalid type: {data}. Expected an integer.")  # pragma: no cover
-
-    def _check_scalar(self, data: object) -> TypeGuard[DateTimeLike]:
+    def _check_scalar(self, data: object) -> TypeGuard[TimeDeltaLike]:
         if data is None:
             return True
-        return isinstance(data, str | int | bytes | np.datetime64 | datetime)
+        return isinstance(data, str | int | bytes | np.timedelta64 | timedelta)
 
-    def _cast_scalar_unchecked(self, data: DateTimeLike) -> np.datetime64:
+    def _cast_scalar_unchecked(self, data: TimeDeltaLike) -> np.timedelta64:
         return self.to_native_dtype().type(data, f"{self.scale_factor}{self.unit}")
 
-    def cast_scalar(self, data: object) -> np.datetime64:
+    def cast_scalar(self, data: object) -> np.timedelta64:
         if self._check_scalar(data):
             return self._cast_scalar_unchecked(data)
-        msg = f"Cannot convert object with type {type(data)} to a numpy datetime scalar."
+        msg = f"Cannot convert object with type {type(data)} to a numpy timedelta64 scalar."
         raise TypeError(msg)
 
-    @classmethod
-    def _check_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> TypeGuard[str]:
-        # match <M[ns], >M[M], etc
-        # consider making this a standalone function
-        if not isinstance(data, str):
-            return False
-        if not data.startswith(cls._zarr_v2_names):
-            return False
-        if len(data) == 3:
-            # no unit, and
-            # we already checked that this string is either <M8 or >M8
-            return True
-        else:
-            return data[4:-1].endswith(get_args(DateTimeUnit)) and data[-1] == "]"
+    def default_scalar(self) -> np.timedelta64:
+        return np.timedelta64("NaT")
+
+    def from_json_scalar(self, data: JSON, *, zarr_format: ZarrFormat) -> np.timedelta64:
+        if check_json_time(data):
+            return self.to_native_dtype().type(data, f"{self.scale_factor}{self.unit}")
+        raise TypeError(f"Invalid type: {data}. Expected an integer.")  # pragma: no cover
+
+
+@dataclass(frozen=True, kw_only=True, slots=True)
+class DateTime64(TimeDTypeBase[np.dtypes.DateTime64DType, np.datetime64], HasEndianness):
+    dtype_cls = np.dtypes.DateTime64DType  # type: ignore[assignment]
+    _zarr_v3_name: ClassVar[Literal["numpy.datetime64"]] = "numpy.datetime64"
+    _zarr_v2_names = (">M8", "<M8")
+    _numpy_name = "datetime64"
+    unit: DateTimeUnit = "generic"
+    scale_factor: int = 1
 
     @classmethod
     def _check_json_v3(cls, data: JSON) -> TypeGuard[DateTime64JSONV3]:
@@ -335,3 +298,43 @@ def from_json_v3(cls, data: JSON) -> Self:
             "'scale_factor' key"
         )
         raise DataTypeValidationError(msg)
+
+    def _check_scalar(self, data: object) -> TypeGuard[DateTimeLike]:
+        if data is None:
+            return True
+        return isinstance(data, str | int | bytes | np.datetime64 | datetime)
+
+    def _cast_scalar_unchecked(self, data: DateTimeLike) -> np.datetime64:
+        return self.to_native_dtype().type(data, f"{self.scale_factor}{self.unit}")
+
+    def cast_scalar(self, data: object) -> np.datetime64:
+        if self._check_scalar(data):
+            return self._cast_scalar_unchecked(data)
+        msg = f"Cannot convert object with type {type(data)} to a numpy datetime scalar."
+        raise TypeError(msg)
+
+    def default_scalar(self) -> np.datetime64:
+        return np.datetime64("NaT")
+
+    def from_json_scalar(self, data: JSON, *, zarr_format: ZarrFormat) -> np.datetime64:
+        if check_json_time(data):
+            return self._cast_scalar_unchecked(data)
+        raise TypeError(f"Invalid type: {data}. Expected an integer.")  # pragma: no cover
+
+    @classmethod
+    def _check_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> TypeGuard[str]:
+        """
+        Check that JSON input is a string representation of a NumPy datetime64 data type, like "<M8"
+        of ">M8[10s]". This function can be used as a type guard to narrow the type of unknown JSON
+        input.
+        """
+        if not isinstance(data, str):
+            return False
+        if not data.startswith(cls._zarr_v2_names):
+            return False
+        if len(data) == 3:
+            # no unit, and
+            # we already checked that this string is either <M8 or >M8
+            return True
+        else:
+            return data[4:-1].endswith(get_args(DateTimeUnit)) and data[-1] == "]"
diff --git a/src/zarr/core/dtype/wrapper.py b/src/zarr/core/dtype/wrapper.py
index 557da87fcf..fa34dc000d 100644
--- a/src/zarr/core/dtype/wrapper.py
+++ b/src/zarr/core/dtype/wrapper.py
@@ -138,89 +138,75 @@ def to_native_dtype(self: Self) -> TDType_co:
         """
         ...
 
+    @classmethod
     @abstractmethod
-    def cast_scalar(self, data: object) -> TScalar_co:
+    def _check_json_v2(
+        cls: type[Self], data: JSON, *, object_codec_id: str | None = None
+    ) -> TypeGuard[DTypeJSON_V2]:
         """
-        Cast a python object to the wrapped scalar type.
-        The type of the provided scalar is first checked for compatibility.
-        If it's incompatible with the associated scalar type, a ``TypeError`` will be raised.
+        Check that JSON data matches the Zarr V2 JSON serialization of this ZDType.
 
         Parameters
         ----------
-        data : object
-            The python object to cast.
-
-        Returns
-        -------
-        TScalar
-            The cast value.
-        """
+        data : JSON
+            The JSON representation of the data type.
 
-    @abstractmethod
-    def _check_scalar(self, data: object) -> bool:
-        """
-        Check that an python object is a valid scalar value for the wrapped data type.
+        object_codec_id : str | None
+            The string identifier of an object codec, if applicable. Object codecs are specific
+            numcodecs codecs that zarr-python 2.x used to serialize numpy "Object" scalars.
+            For example, a dtype field set to ``"|O"`` with an object codec ID of "vlen-utf8"
+            indicates that the data type is a variable-length string.
 
-        Parameters
-        ----------
-        data : object
-            A value to check.
+            Zarr V3 has no such logic, so this parameter is only used for Zarr V2 compatibility.
 
         Returns
         -------
         Bool
-            True if the object is valid, False otherwise.
+            True if the JSON representation matches this data type, False otherwise.
         """
         ...
 
+    @classmethod
     @abstractmethod
-    def default_scalar(self) -> TScalar_co:
+    def _check_json_v3(cls: type[Self], data: JSON) -> TypeGuard[DTypeJSON_V3]:
         """
-        Get the default scalar value for the wrapped data type. This is a method, rather than an
-        attribute, because the default value for some data types depends on parameters that are
-        not known until a concrete data type is wrapped. For example, data types parametrized by a
-        length like fixed-length strings or bytes will generate scalars consistent with that length.
+        Check that JSON data matches the Zarr V3 JSON serialization of this ZDType.
+
+        Parameters
+        ----------
+        data : JSON
+            The JSON representation of the data type.
 
         Returns
         -------
-        TScalar
-            The default value for this data type.
+        Bool
+            True if the JSON representation matches, False otherwise.
         """
         ...
 
     @classmethod
     @abstractmethod
-    def _check_json_v2(
-        cls: type[Self], data: JSON, *, object_codec_id: str | None = None
-    ) -> TypeGuard[DTypeJSON_V2]:
+    def from_json_v2(cls: type[Self], data: JSON, *, object_codec_id: str | None = None) -> Self:
         """
-        Check that JSON data matches the Zarr V2 JSON serialization of this ZDType.
+        Create an instance of this ZDType from Zarr V2 JSON data.
 
         Parameters
         ----------
         data : JSON
             The JSON representation of the data type.
 
-        object_codec_id : str | None
-            The string identifier of an object codec, if applicable. Object codecs are specific
-            numcodecs codecs that zarr-python 2.x used to serialize numpy "Object" scalars.
-            For example, a dtype field set to ``"|O"`` with an object codec ID of "vlen-utf8"
-            indicates that the data type is a variable-length string.
-
-            Zarr V3 has no such logic, so this parameter is only used for Zarr V2 compatibility.
-
         Returns
         -------
-        Bool
-            True if the JSON representation matches this data type, False otherwise.
+        Self
+            The wrapped data type.
         """
         ...
 
     @classmethod
     @abstractmethod
-    def _check_json_v3(cls: type[Self], data: JSON) -> TypeGuard[DTypeJSON_V3]:
+    def from_json_v3(cls: type[Self], data: JSON) -> Self:
         """
-        Check that JSON data matches the Zarr V3 JSON serialization of this ZDType.
+        Create an instance of this ZDType from Zarr V3 JSON data.
 
         Parameters
         ----------
@@ -229,8 +215,8 @@ def _check_json_v3(cls: type[Self], data: JSON) -> TypeGuard[DTypeJSON_V3]:
 
         Returns
         -------
-        Bool
-            True if the JSON representation matches, False otherwise.
+        Self
+            The wrapped data type.
         """
         ...
 
@@ -257,80 +243,94 @@ def to_json(self, zarr_format: ZarrFormat) -> DTypeJSON_V2 | DTypeJSON_V3:
         """
         ...
 
-    @classmethod
     @abstractmethod
-    def from_json_v3(cls: type[Self], data: JSON) -> Self:
+    def _check_scalar(self, data: object) -> bool:
         """
-        Create an instance of this ZDType from Zarr V3 JSON data.
+        Check that an python object is a valid scalar value for the wrapped data type.
 
         Parameters
         ----------
-        data : JSON
-            The JSON representation of the data type.
+        data : object
+            A value to check.
 
         Returns
         -------
-        Self
-            The wrapped data type.
+        Bool
+            True if the object is valid, False otherwise.
         """
         ...
 
-    @classmethod
     @abstractmethod
-    def from_json_v2(cls: type[Self], data: JSON, *, object_codec_id: str | None = None) -> Self:
+    def cast_scalar(self, data: object) -> TScalar_co:
         """
-        Create an instance of this ZDType from Zarr V2 JSON data.
+        Cast a python object to the wrapped scalar type.
+        The type of the provided scalar is first checked for compatibility.
+        If it's incompatible with the associated scalar type, a ``TypeError`` will be raised.
 
         Parameters
         ----------
-        data : JSON
-            The JSON representation of the data type.
+        data : object
+            The python object to cast.
 
         Returns
         -------
-        Self
-            The wrapped data type.
+        TScalar
+            The cast value.
+        """
+
+    @abstractmethod
+    def default_scalar(self) -> TScalar_co:
+        """
+        Get the default scalar value for the wrapped data type. This is a method, rather than an
+        attribute, because the default value for some data types depends on parameters that are
+        not known until a concrete data type is wrapped. For example, data types parametrized by a
+        length like fixed-length strings or bytes will generate scalars consistent with that length.
+
+        Returns
+        -------
+        TScalar
+            The default value for this data type.
         """
         ...
 
     @abstractmethod
-    def to_json_scalar(self, data: object, *, zarr_format: ZarrFormat) -> JSON:
+    def from_json_scalar(self: Self, data: JSON, *, zarr_format: ZarrFormat) -> TScalar_co:
         """
-        Serialize a python object to the JSON representation of a scalar. The value will first be
-        cast to the scalar type associated with this ZDType, then serialized to JSON.
+        Read a JSON-serializable value as a scalar.
 
         Parameters
         ----------
-        data : object
-            The value to convert.
+        data : JSON
+            A JSON representation of a scalar value.
         zarr_format : ZarrFormat
             The zarr format version. This is specified because the JSON serialization of scalars
             differs between Zarr V2 and Zarr V3.
 
         Returns
         -------
-        JSON
-            The JSON-serialized scalar.
+        TScalar
+            The deserialized scalar value.
         """
         ...
 
     @abstractmethod
-    def from_json_scalar(self: Self, data: JSON, *, zarr_format: ZarrFormat) -> TScalar_co:
+    def to_json_scalar(self, data: object, *, zarr_format: ZarrFormat) -> JSON:
         """
-        Read a JSON-serializable value as a scalar.
+        Serialize a python object to the JSON representation of a scalar. The value will first be
+        cast to the scalar type associated with this ZDType, then serialized to JSON.
 
         Parameters
         ----------
-        data : JSON
-            A JSON representation of a scalar value.
+        data : object
+            The value to convert.
         zarr_format : ZarrFormat
             The zarr format version. This is specified because the JSON serialization of scalars
             differs between Zarr V2 and Zarr V3.
 
         Returns
         -------
-        TScalar
-            The deserialized scalar value.
+        JSON
+            The JSON-serialized scalar.
         """
         ...
 

From d26b695ae27e974349cc95f5e0368abb6dcb1fcb Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Sat, 14 Jun 2025 15:28:34 +0300
Subject: [PATCH 128/130] allow structured scalars to be np.void

---
 src/zarr/core/dtype/npy/structured.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/zarr/core/dtype/npy/structured.py b/src/zarr/core/dtype/npy/structured.py
index 190647c1e1..66dfed87f6 100644
--- a/src/zarr/core/dtype/npy/structured.py
+++ b/src/zarr/core/dtype/npy/structured.py
@@ -150,7 +150,7 @@ def to_json(self, zarr_format: ZarrFormat) -> DTypeJSON_V3 | DTypeJSON_V2:
 
     def _check_scalar(self, data: object) -> TypeGuard[StructuredScalarLike]:
         # TODO: implement something more precise here!
-        return isinstance(data, (bytes, list, tuple, int))
+        return isinstance(data, (bytes, list, tuple, int, np.void))
 
     def _cast_scalar_unchecked(self, data: StructuredScalarLike) -> np.void:
         na_dtype = self.to_native_dtype()

From 49f00622ecf5d5483c8b4e58c17bffde95d449d3 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Sun, 15 Jun 2025 23:09:42 +0200
Subject: [PATCH 129/130] use a common function signature for from_json by
 packing the object_codec_id in a typeddict for zarr v2 metadata

---
 src/zarr/core/dtype/__init__.py              |  27 +--
 src/zarr/core/dtype/common.py                | 135 ++++++++++++-
 src/zarr/core/dtype/npy/bool.py              |  43 +++--
 src/zarr/core/dtype/npy/bytes.py             | 101 ++++++----
 src/zarr/core/dtype/npy/complex.py           |  46 +++--
 src/zarr/core/dtype/npy/float.py             |  46 +++--
 src/zarr/core/dtype/npy/int.py               | 190 ++++++++++++-------
 src/zarr/core/dtype/npy/string.py            |  76 ++++----
 src/zarr/core/dtype/npy/structured.py        |  87 +++++----
 src/zarr/core/dtype/npy/time.py              |  91 +++++----
 src/zarr/core/dtype/registry.py              |  29 +--
 src/zarr/core/dtype/wrapper.py               |  89 ++-------
 src/zarr/core/metadata/v2.py                 |  81 +++++---
 src/zarr/core/metadata/v3.py                 |  11 +-
 tests/package_with_entrypoint/__init__.py    |  37 ++--
 tests/test_dtype/test_npy/test_bool.py       |   4 +-
 tests/test_dtype/test_npy/test_bytes.py      |  12 +-
 tests/test_dtype/test_npy/test_complex.py    |  12 +-
 tests/test_dtype/test_npy/test_float.py      |  17 +-
 tests/test_dtype/test_npy/test_int.py        |  36 +++-
 tests/test_dtype/test_npy/test_string.py     |  11 +-
 tests/test_dtype/test_npy/test_structured.py |  18 +-
 tests/test_dtype/test_npy/test_time.py       |  18 +-
 tests/test_dtype/test_wrapper.py             |  26 +--
 tests/test_dtype_registry.py                 |  44 ++---
 tests/test_group.py                          |   5 +-
 26 files changed, 801 insertions(+), 491 deletions(-)

diff --git a/src/zarr/core/dtype/__init__.py b/src/zarr/core/dtype/__init__.py
index 25e5163e43..735690d4bc 100644
--- a/src/zarr/core/dtype/__init__.py
+++ b/src/zarr/core/dtype/__init__.py
@@ -2,7 +2,10 @@
 
 from typing import TYPE_CHECKING, Final, TypeAlias
 
-from zarr.core.dtype.common import DataTypeValidationError
+from zarr.core.dtype.common import (
+    DataTypeValidationError,
+    DTypeJSON,
+)
 from zarr.core.dtype.npy.bool import Bool
 from zarr.core.dtype.npy.bytes import NullTerminatedBytes, RawBytes, VariableLengthBytes
 from zarr.core.dtype.npy.complex import Complex64, Complex128
@@ -131,20 +134,20 @@ def get_data_type_from_native_dtype(dtype: npt.DTypeLike) -> ZDType[TBaseDType,
     return data_type_registry.match_dtype(dtype=na_dtype)
 
 
-def get_data_type_from_json_v3(
-    dtype_spec: JSON,
-) -> ZDType[TBaseDType, TBaseScalar]:
-    return data_type_registry.match_json_v3(dtype_spec)
-
-
-def get_data_type_from_json_v2(
-    dtype_spec: JSON, *, object_codec_id: str | None = None
+def get_data_type_from_json(
+    dtype_spec: DTypeJSON, *, zarr_format: ZarrFormat
 ) -> ZDType[TBaseDType, TBaseScalar]:
-    return data_type_registry.match_json_v2(dtype_spec, object_codec_id=object_codec_id)
+    """
+    Given a JSON representation of a data type and a Zarr format version,
+    attempt to create a ZDType instance from the registered ZDType classes.
+    """
+    return data_type_registry.match_json(dtype_spec, zarr_format=zarr_format)
 
 
 def parse_data_type(
-    dtype_spec: ZDTypeLike, *, zarr_format: ZarrFormat, object_codec_id: str | None = None
+    dtype_spec: ZDTypeLike,
+    *,
+    zarr_format: ZarrFormat,
 ) -> ZDType[TBaseDType, TBaseScalar]:
     """
     Interpret the input as a ZDType instance.
@@ -153,7 +156,7 @@ def parse_data_type(
         return dtype_spec
     # dict and zarr_format 3 means that we have a JSON object representation of the dtype
     if zarr_format == 3 and isinstance(dtype_spec, Mapping):
-        return get_data_type_from_json_v3(dtype_spec)  # type: ignore[arg-type]
+        return get_data_type_from_json(dtype_spec, zarr_format=3)
     # otherwise, we have either a numpy dtype string, or a zarr v3 dtype string, and in either case
     # we can create a numpy dtype from it, and do the dtype inference from that
     return get_data_type_from_native_dtype(dtype_spec)  # type: ignore[arg-type]
diff --git a/src/zarr/core/dtype/common.py b/src/zarr/core/dtype/common.py
index 9fabfa2737..6f61b6775e 100644
--- a/src/zarr/core/dtype/common.py
+++ b/src/zarr/core/dtype/common.py
@@ -1,16 +1,149 @@
 from __future__ import annotations
 
 import warnings
+from collections.abc import Mapping, Sequence
 from dataclasses import dataclass
-from typing import ClassVar, Final, Literal
+from typing import (
+    ClassVar,
+    Final,
+    Generic,
+    Literal,
+    TypedDict,
+    TypeGuard,
+    TypeVar,
+)
+
+from zarr.core.common import NamedConfig
 
 EndiannessStr = Literal["little", "big"]
 ENDIANNESS_STR: Final = "little", "big"
+
 SpecialFloatStrings = Literal["NaN", "Infinity", "-Infinity"]
 SPECIAL_FLOAT_STRINGS: Final = ("NaN", "Infinity", "-Infinity")
+
 JSONFloatV2 = float | SpecialFloatStrings
 JSONFloatV3 = float | SpecialFloatStrings | str
 
+ObjectCodecID = Literal["vlen-utf8", "vlen-bytes", "vlen-array", "pickle", "json2", "msgpack2"]
+# These are the ids of the known object codecs for zarr v2.
+OBJECT_CODEC_IDS: Final = ("vlen-utf8", "vlen-bytes", "vlen-array", "pickle", "json2", "msgpack2")
+
+# This is a wider type than our standard JSON type because we need
+# to work with typeddict objects which are assignable to Mapping[str, object]
+DTypeJSON = str | int | float | Sequence["DTypeJSON"] | None | Mapping[str, object]
+
+# The DTypeJSON_V2 type exists because ZDType.from_json takes a single argument, which must contain
+# all the information necessary to decode the data type. Zarr v2 supports multiple distinct
+# data types that all used the "|O" data type identifier. These data types can only be
+# discriminated on the basis of their "object codec", i.e. a special data type specific
+# compressor or filter. So to figure out what data type a zarr v2 array has, we need the
+# data type identifier from metadata, as well as an object codec id if the data type identifier
+# is "|O".
+# So we will pack the name of the dtype alongside the name of the object codec id, if applicable,
+# in a single dict, and pass that to the data type inference logic.
+# These type variables have a very wide bound because the individual zdtype
+# classes can perform a very specific type check.
+
+# This is the JSON representation of a structured dtype in zarr v2
+StructuredName_V2 = Sequence["str | StructuredName_V2"]
+
+# This models the type of the name a dtype might have in zarr v2 array metadata
+DTypeName_V2 = StructuredName_V2 | str
+
+TDTypeNameV2_co = TypeVar("TDTypeNameV2_co", bound=DTypeName_V2, covariant=True)
+TObjectCodecID_co = TypeVar("TObjectCodecID_co", bound=None | str, covariant=True)
+
+
+class DTypeConfig_V2(TypedDict, Generic[TDTypeNameV2_co, TObjectCodecID_co]):
+    name: TDTypeNameV2_co
+    object_codec_id: TObjectCodecID_co
+
+
+DTypeSpec_V2 = DTypeConfig_V2[DTypeName_V2, None | str]
+
+
+def check_structured_dtype_v2_inner(data: object) -> TypeGuard[StructuredName_V2]:
+    """
+    A type guard for the inner elements of a structured dtype. This is a recursive check because
+    the type is itself recursive.
+
+    This check ensures that all the elements are 2-element sequences beginning with a string
+    and ending with either another string or another 2-element sequence beginning with a string and
+    ending with another instance of that type.
+    """
+    if isinstance(data, (str, Mapping)):
+        return False
+    if not isinstance(data, Sequence):
+        return False
+    if len(data) != 2:
+        return False
+    if not (isinstance(data[0], str)):
+        return False
+    if isinstance(data[-1], str):
+        return True
+    elif isinstance(data[-1], Sequence):
+        return check_structured_dtype_v2_inner(data[-1])
+    return False
+
+
+def check_structured_dtype_name_v2(data: Sequence[object]) -> TypeGuard[StructuredName_V2]:
+    return all(check_structured_dtype_v2_inner(d) for d in data)
+
+
+def check_dtype_name_v2(data: object) -> TypeGuard[DTypeName_V2]:
+    """
+    Type guard for narrowing the type of a python object to an valid zarr v2 dtype name.
+    """
+    if isinstance(data, str):
+        return True
+    elif isinstance(data, Sequence):
+        return check_structured_dtype_name_v2(data)
+    return False
+
+
+def check_dtype_spec_v2(data: object) -> TypeGuard[DTypeSpec_V2]:
+    """
+    Type guard for narrowing a python object to an instance of DTypeSpec_V2
+    """
+    if not isinstance(data, Mapping):
+        return False
+    if set(data.keys()) != {"name", "object_codec_id"}:
+        return False
+    if not check_dtype_name_v2(data["name"]):
+        return False
+    return isinstance(data["object_codec_id"], str | None)
+
+
+# By comparison, The JSON representation of a dtype in zarr v3 is much simpler.
+# It's either a string, or a structured dict
+DTypeSpec_V3 = str | NamedConfig[str, Mapping[str, object]]
+
+
+def check_dtype_spec_v3(data: object) -> TypeGuard[DTypeSpec_V3]:
+    """
+    Type guard for narrowing the type of a python object to an instance of
+    DTypeSpec_V3, i.e either a string or a dict with a "name" field that's a string and a
+    "configuration" field that's a mapping with string keys.
+    """
+    if isinstance(data, str) or (  # noqa: SIM103
+        isinstance(data, Mapping)
+        and set(data.keys()) == {"name", "configuration"}
+        and isinstance(data["configuration"], Mapping)
+        and all(isinstance(k, str) for k in data["configuration"])
+    ):
+        return True
+    return False
+
+
+def unpack_dtype_json(data: DTypeSpec_V2 | DTypeSpec_V3) -> DTypeJSON:
+    """
+    Return the array metadata form of the dtype JSON representation. For the Zarr V3 form of dtype
+    metadata, this is a no-op. For the Zarr V2 form of dtype metadata, this unpacks the dtype name.
+    """
+    if isinstance(data, Mapping) and set(data.keys()) == {"name", "object_codec_id"}:
+        return data["name"]
+    return data
+
 
 class DataTypeValidationError(ValueError): ...
 
diff --git a/src/zarr/core/dtype/npy/bool.py b/src/zarr/core/dtype/npy/bool.py
index 7570dd1f4f..d8d52468bf 100644
--- a/src/zarr/core/dtype/npy/bool.py
+++ b/src/zarr/core/dtype/npy/bool.py
@@ -1,12 +1,22 @@
+from __future__ import annotations
+
 from dataclasses import dataclass
-from typing import ClassVar, Literal, Self, TypeGuard, overload
+from typing import TYPE_CHECKING, ClassVar, Literal, Self, TypeGuard, overload
 
 import numpy as np
 
-from zarr.core.common import JSON, ZarrFormat
-from zarr.core.dtype.common import DataTypeValidationError, HasItemSize
+from zarr.core.dtype.common import (
+    DataTypeValidationError,
+    DTypeConfig_V2,
+    DTypeJSON,
+    HasItemSize,
+    check_dtype_spec_v2,
+)
 from zarr.core.dtype.wrapper import TBaseDType, ZDType
 
+if TYPE_CHECKING:
+    from zarr.core.common import JSON, ZarrFormat
+
 
 @dataclass(frozen=True, kw_only=True, slots=True)
 class Bool(ZDType[np.dtypes.BoolDType, np.bool_], HasItemSize):
@@ -44,40 +54,47 @@ def to_native_dtype(self: Self) -> np.dtypes.BoolDType:
 
     @classmethod
     def _check_json_v2(
-        cls, data: JSON, *, object_codec_id: str | None = None
-    ) -> TypeGuard[Literal["|b1"]]:
+        cls,
+        data: DTypeJSON,
+    ) -> TypeGuard[DTypeConfig_V2[Literal["|b1"], None]]:
         """
         Check that the input is a valid JSON representation of a Bool.
         """
-        return data == cls._zarr_v2_name
+        return (
+            check_dtype_spec_v2(data)
+            and data["name"] == cls._zarr_v2_name
+            and data["object_codec_id"] is None
+        )
 
     @classmethod
-    def _check_json_v3(cls, data: JSON) -> TypeGuard[Literal["bool"]]:
+    def _check_json_v3(cls, data: DTypeJSON) -> TypeGuard[Literal["bool"]]:
         return data == cls._zarr_v3_name
 
     @classmethod
-    def from_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> Self:
+    def _from_json_v2(cls, data: DTypeJSON) -> Self:
         if cls._check_json_v2(data):
             return cls()
         msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected the string {cls._zarr_v2_name!r}"
         raise DataTypeValidationError(msg)
 
     @classmethod
-    def from_json_v3(cls: type[Self], data: JSON) -> Self:
+    def _from_json_v3(cls: type[Self], data: DTypeJSON) -> Self:
         if cls._check_json_v3(data):
             return cls()
         msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected the string {cls._zarr_v3_name!r}"
         raise DataTypeValidationError(msg)
 
-    @overload
-    def to_json(self, zarr_format: Literal[2]) -> Literal["|b1"]: ...
+    @overload  # type: ignore[override]
+    def to_json(self, zarr_format: Literal[2]) -> DTypeConfig_V2[Literal["|b1"], None]: ...
 
     @overload
     def to_json(self, zarr_format: Literal[3]) -> Literal["bool"]: ...
 
-    def to_json(self, zarr_format: ZarrFormat) -> Literal["|b1", "bool"]:
+    def to_json(
+        self, zarr_format: ZarrFormat
+    ) -> DTypeConfig_V2[Literal["|b1"], None] | Literal["bool"]:
         if zarr_format == 2:
-            return self._zarr_v2_name
+            return {"name": self._zarr_v2_name, "object_codec_id": None}
         elif zarr_format == 3:
             return self._zarr_v3_name
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
diff --git a/src/zarr/core/dtype/npy/bytes.py b/src/zarr/core/dtype/npy/bytes.py
index 9dc0bb1a68..e363c75053 100644
--- a/src/zarr/core/dtype/npy/bytes.py
+++ b/src/zarr/core/dtype/npy/bytes.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 import base64
 import re
 from dataclasses import dataclass
@@ -8,9 +10,12 @@
 from zarr.core.common import JSON, NamedConfig, ZarrFormat
 from zarr.core.dtype.common import (
     DataTypeValidationError,
+    DTypeConfig_V2,
+    DTypeJSON,
     HasItemSize,
     HasLength,
     HasObjectCodec,
+    check_dtype_spec_v2,
     v3_unstable_dtype_warning,
 )
 from zarr.core.dtype.npy.common import check_json_str
@@ -44,15 +49,20 @@ def to_native_dtype(self) -> np.dtypes.BytesDType[int]:
         return self.dtype_cls(self.length)
 
     @classmethod
-    def _check_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> TypeGuard[str]:
+    def _check_json_v2(cls, data: DTypeJSON) -> TypeGuard[DTypeConfig_V2[str, None]]:
         """
-        Check that the input is a valid JSON representation of a numpy S dtype.
+        Check that the input is a valid representation of a numpy S dtype. We expect
+        something like ``{"name": "|S10", "object_codec_id": None}``
         """
-        # match |S1, |S2, etc
-        return isinstance(data, str) and re.match(r"^\|S\d+$", data) is not None
+        return (
+            check_dtype_spec_v2(data)
+            and isinstance(data["name"], str)
+            and re.match(r"^\|S\d+$", data["name"]) is not None
+            and data["object_codec_id"] is None
+        )
 
     @classmethod
-    def _check_json_v3(cls, data: JSON) -> TypeGuard[NullTerminatedBytesJSONV3]:
+    def _check_json_v3(cls, data: DTypeJSON) -> TypeGuard[NullTerminatedBytesJSONV3]:
         return (
             isinstance(data, dict)
             and set(data.keys()) == {"name", "configuration"}
@@ -62,28 +72,31 @@ def _check_json_v3(cls, data: JSON) -> TypeGuard[NullTerminatedBytesJSONV3]:
         )
 
     @classmethod
-    def from_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> Self:
+    def _from_json_v2(cls, data: DTypeJSON) -> Self:
         if cls._check_json_v2(data):
-            return cls(length=int(data[2:]))
+            name = data["name"]
+            return cls(length=int(name[2:]))
         msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected a string like '|S1', '|S2', etc"
         raise DataTypeValidationError(msg)
 
     @classmethod
-    def from_json_v3(cls, data: JSON) -> Self:
+    def _from_json_v3(cls, data: DTypeJSON) -> Self:
         if cls._check_json_v3(data):
             return cls(length=data["configuration"]["length_bytes"])
         msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected the string {cls._zarr_v3_name!r}"
         raise DataTypeValidationError(msg)
 
-    @overload
-    def to_json(self, zarr_format: Literal[2]) -> str: ...
+    @overload  # type: ignore[override]
+    def to_json(self, zarr_format: Literal[2]) -> DTypeConfig_V2[str, None]: ...
 
     @overload
     def to_json(self, zarr_format: Literal[3]) -> NullTerminatedBytesJSONV3: ...
 
-    def to_json(self, zarr_format: ZarrFormat) -> str | NullTerminatedBytesJSONV3:
+    def to_json(
+        self, zarr_format: ZarrFormat
+    ) -> DTypeConfig_V2[str, None] | NullTerminatedBytesJSONV3:
         if zarr_format == 2:
-            return self.to_native_dtype().str
+            return {"name": self.to_native_dtype().str, "object_codec_id": None}
         elif zarr_format == 3:
             v3_unstable_dtype_warning(self)
             return {
@@ -179,12 +192,20 @@ def to_native_dtype(self) -> np.dtypes.VoidDType[int]:
         return cast("np.dtypes.VoidDType[int]", np.dtype(f"V{self.length}"))
 
     @classmethod
-    def _check_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> TypeGuard[str]:
-        # Check that the dtype is |V1, |V2, ...
-        return isinstance(data, str) and re.match(r"^\|V\d+$", data) is not None
+    def _check_json_v2(cls, data: DTypeJSON) -> TypeGuard[DTypeConfig_V2[str, None]]:
+        """
+        Check that the input is a valid representation of a numpy S dtype. We expect
+        something like ``{"name": "|V10", "object_codec_id": None}``
+        """
+        return (
+            check_dtype_spec_v2(data)
+            and isinstance(data["name"], str)
+            and re.match(r"^\|V\d+$", data["name"]) is not None
+            and data["object_codec_id"] is None
+        )
 
     @classmethod
-    def _check_json_v3(cls, data: JSON) -> TypeGuard[RawBytesJSONV3]:
+    def _check_json_v3(cls, data: DTypeJSON) -> TypeGuard[RawBytesJSONV3]:
         return (
             isinstance(data, dict)
             and set(data.keys()) == {"name", "configuration"}
@@ -194,28 +215,29 @@ def _check_json_v3(cls, data: JSON) -> TypeGuard[RawBytesJSONV3]:
         )
 
     @classmethod
-    def from_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> Self:
+    def _from_json_v2(cls, data: DTypeJSON) -> Self:
         if cls._check_json_v2(data):
-            return cls(length=int(data[2:]))
+            name = data["name"]
+            return cls(length=int(name[2:]))
         msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected a string like '|V1', '|V2', etc"
         raise DataTypeValidationError(msg)
 
     @classmethod
-    def from_json_v3(cls, data: JSON) -> Self:
+    def _from_json_v3(cls, data: DTypeJSON) -> Self:
         if cls._check_json_v3(data):
             return cls(length=data["configuration"]["length_bytes"])
         msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected the string {cls._zarr_v3_name!r}"
         raise DataTypeValidationError(msg)
 
-    @overload
-    def to_json(self, zarr_format: Literal[2]) -> str: ...
+    @overload  # type: ignore[override]
+    def to_json(self, zarr_format: Literal[2]) -> DTypeConfig_V2[str, None]: ...
 
     @overload
     def to_json(self, zarr_format: Literal[3]) -> RawBytesJSONV3: ...
 
-    def to_json(self, zarr_format: ZarrFormat) -> str | RawBytesJSONV3:
+    def to_json(self, zarr_format: ZarrFormat) -> DTypeConfig_V2[str, None] | RawBytesJSONV3:
         if zarr_format == 2:
-            return self.to_native_dtype().str
+            return {"name": self.to_native_dtype().str, "object_codec_id": None}
         elif zarr_format == 3:
             v3_unstable_dtype_warning(self)
             return {"name": self._zarr_v3_name, "configuration": {"length_bytes": self.length}}
@@ -257,7 +279,7 @@ def item_size(self) -> int:
 class VariableLengthBytes(ZDType[np.dtypes.ObjectDType, bytes], HasObjectCodec):
     dtype_cls = np.dtypes.ObjectDType
     _zarr_v3_name: ClassVar[Literal["variable_length_bytes"]] = "variable_length_bytes"
-    object_codec_id = "vlen-bytes"
+    object_codec_id: ClassVar[Literal["vlen-bytes"]] = "vlen-bytes"
 
     @classmethod
     def from_native_dtype(cls, dtype: TBaseDType) -> Self:
@@ -272,41 +294,50 @@ def to_native_dtype(self) -> np.dtypes.ObjectDType:
 
     @classmethod
     def _check_json_v2(
-        cls, data: JSON, *, object_codec_id: str | None = None
-    ) -> TypeGuard[Literal["|O"]]:
+        cls,
+        data: DTypeJSON,
+    ) -> TypeGuard[DTypeConfig_V2[Literal["|O"], Literal["vlen-bytes"]]]:
         """
         Check that the input is a valid JSON representation of a numpy O dtype, and that the
         object codec id is appropriate for variable-length UTF-8 strings.
         """
-        return data == "|O" and object_codec_id == cls.object_codec_id
+        return (
+            check_dtype_spec_v2(data)
+            and data["name"] == "|O"
+            and data["object_codec_id"] == cls.object_codec_id
+        )
 
     @classmethod
-    def _check_json_v3(cls, data: JSON) -> TypeGuard[Literal["variable_length_bytes"]]:
+    def _check_json_v3(cls, data: DTypeJSON) -> TypeGuard[Literal["variable_length_bytes"]]:
         return data == cls._zarr_v3_name
 
     @classmethod
-    def from_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> Self:
-        if cls._check_json_v2(data, object_codec_id=object_codec_id):
+    def _from_json_v2(cls, data: DTypeJSON) -> Self:
+        if cls._check_json_v2(data):
             return cls()
         msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected the string '|O' and an object_codec_id of {cls.object_codec_id}"
         raise DataTypeValidationError(msg)
 
     @classmethod
-    def from_json_v3(cls, data: JSON) -> Self:
+    def _from_json_v3(cls, data: DTypeJSON) -> Self:
         if cls._check_json_v3(data):
             return cls()
         msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected the string {cls._zarr_v3_name!r}"
         raise DataTypeValidationError(msg)
 
-    @overload
-    def to_json(self, zarr_format: Literal[2]) -> Literal["|O"]: ...
+    @overload  # type: ignore[override]
+    def to_json(
+        self, zarr_format: Literal[2]
+    ) -> DTypeConfig_V2[Literal["|O"], Literal["vlen-bytes"]]: ...
 
     @overload
     def to_json(self, zarr_format: Literal[3]) -> Literal["variable_length_bytes"]: ...
 
-    def to_json(self, zarr_format: ZarrFormat) -> Literal["|O", "variable_length_bytes"]:
+    def to_json(
+        self, zarr_format: ZarrFormat
+    ) -> DTypeConfig_V2[Literal["|O"], Literal["vlen-bytes"]] | Literal["variable_length_bytes"]:
         if zarr_format == 2:
-            return "|O"
+            return {"name": "|O", "object_codec_id": self.object_codec_id}
         elif zarr_format == 3:
             v3_unstable_dtype_warning(self)
             return self._zarr_v3_name
diff --git a/src/zarr/core/dtype/npy/complex.py b/src/zarr/core/dtype/npy/complex.py
index bc06af00b5..38e506f1bc 100644
--- a/src/zarr/core/dtype/npy/complex.py
+++ b/src/zarr/core/dtype/npy/complex.py
@@ -1,15 +1,25 @@
+from __future__ import annotations
+
 from dataclasses import dataclass
 from typing import (
+    TYPE_CHECKING,
     ClassVar,
     Literal,
     Self,
     TypeGuard,
+    overload,
 )
 
 import numpy as np
 
-from zarr.core.common import JSON, ZarrFormat
-from zarr.core.dtype.common import DataTypeValidationError, HasEndianness, HasItemSize
+from zarr.core.dtype.common import (
+    DataTypeValidationError,
+    DTypeConfig_V2,
+    DTypeJSON,
+    HasEndianness,
+    HasItemSize,
+    check_dtype_spec_v2,
+)
 from zarr.core.dtype.npy.common import (
     ComplexLike,
     TComplexDType_co,
@@ -25,6 +35,9 @@
 )
 from zarr.core.dtype.wrapper import TBaseDType, ZDType
 
+if TYPE_CHECKING:
+    from zarr.core.common import JSON, ZarrFormat
+
 
 @dataclass(frozen=True)
 class BaseComplex(ZDType[TComplexDType_co, TComplexScalar_co], HasEndianness, HasItemSize):
@@ -44,33 +57,44 @@ def to_native_dtype(self) -> TComplexDType_co:
         return self.dtype_cls().newbyteorder(byte_order)  # type: ignore[return-value]
 
     @classmethod
-    def _check_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> TypeGuard[str]:
+    def _check_json_v2(cls, data: DTypeJSON) -> TypeGuard[DTypeConfig_V2[str, None]]:
         """
         Check that the input is a valid JSON representation of this data type.
         """
-        return data in cls._zarr_v2_names
+        return (
+            check_dtype_spec_v2(data)
+            and data["name"] in cls._zarr_v2_names
+            and data["object_codec_id"] is None
+        )
 
     @classmethod
-    def _check_json_v3(cls, data: JSON) -> TypeGuard[str]:
+    def _check_json_v3(cls, data: DTypeJSON) -> TypeGuard[str]:
         return data == cls._zarr_v3_name
 
     @classmethod
-    def from_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> Self:
-        if cls._check_json_v2(data, object_codec_id=object_codec_id):
+    def _from_json_v2(cls, data: DTypeJSON) -> Self:
+        if cls._check_json_v2(data):
             # Going via numpy ensures that we get the endianness correct without
             # annoying string parsing.
-            return cls.from_native_dtype(np.dtype(data))
+            name = data["name"]
+            return cls.from_native_dtype(np.dtype(name))
         msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected one of the strings {cls._zarr_v2_names}."
         raise DataTypeValidationError(msg)
 
     @classmethod
-    def from_json_v3(cls, data: JSON) -> Self:
+    def _from_json_v3(cls, data: DTypeJSON) -> Self:
         if cls._check_json_v3(data):
             return cls()
         msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected {cls._zarr_v3_name}."
         raise DataTypeValidationError(msg)
 
-    def to_json(self, zarr_format: ZarrFormat) -> str:
+    @overload  # type: ignore[override]
+    def to_json(self, zarr_format: Literal[2]) -> DTypeConfig_V2[str, None]: ...
+
+    @overload
+    def to_json(self, zarr_format: Literal[3]) -> str: ...
+
+    def to_json(self, zarr_format: ZarrFormat) -> DTypeConfig_V2[str, None] | str:
         """
         Convert the wrapped data type to a JSON-serializable form.
 
@@ -85,7 +109,7 @@ def to_json(self, zarr_format: ZarrFormat) -> str:
             The JSON-serializable representation of the wrapped data type
         """
         if zarr_format == 2:
-            return self.to_native_dtype().str
+            return {"name": self.to_native_dtype().str, "object_codec_id": None}
         elif zarr_format == 3:
             return self._zarr_v3_name
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
diff --git a/src/zarr/core/dtype/npy/float.py b/src/zarr/core/dtype/npy/float.py
index 9271886c20..7b7243993f 100644
--- a/src/zarr/core/dtype/npy/float.py
+++ b/src/zarr/core/dtype/npy/float.py
@@ -1,14 +1,18 @@
+from __future__ import annotations
+
 from dataclasses import dataclass
-from typing import ClassVar, Self, TypeGuard
+from typing import TYPE_CHECKING, ClassVar, Literal, Self, TypeGuard, overload
 
 import numpy as np
 
-from zarr.core.common import JSON, ZarrFormat
 from zarr.core.dtype.common import (
     DataTypeValidationError,
+    DTypeConfig_V2,
+    DTypeJSON,
     HasEndianness,
     HasItemSize,
     ScalarTypeValidationError,
+    check_dtype_spec_v2,
 )
 from zarr.core.dtype.npy.common import (
     FloatLike,
@@ -25,6 +29,9 @@
 )
 from zarr.core.dtype.wrapper import TBaseDType, ZDType
 
+if TYPE_CHECKING:
+    from zarr.core.common import JSON, ZarrFormat
+
 
 @dataclass(frozen=True)
 class BaseFloat(ZDType[TFloatDType_co, TFloatScalar_co], HasEndianness, HasItemSize):
@@ -44,33 +51,44 @@ def to_native_dtype(self) -> TFloatDType_co:
         return self.dtype_cls().newbyteorder(byte_order)  # type: ignore[return-value]
 
     @classmethod
-    def _check_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> TypeGuard[str]:
+    def _check_json_v2(cls, data: DTypeJSON) -> TypeGuard[DTypeConfig_V2[str, None]]:
         """
         Check that the input is a valid JSON representation of this data type.
         """
-        return data in cls._zarr_v2_names
+        return (
+            check_dtype_spec_v2(data)
+            and data["name"] in cls._zarr_v2_names
+            and data["object_codec_id"] is None
+        )
 
     @classmethod
-    def _check_json_v3(cls, data: JSON) -> TypeGuard[str]:
+    def _check_json_v3(cls, data: DTypeJSON) -> TypeGuard[str]:
         return data == cls._zarr_v3_name
 
     @classmethod
-    def from_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> Self:
-        if cls._check_json_v2(data, object_codec_id=object_codec_id):
+    def _from_json_v2(cls, data: DTypeJSON) -> Self:
+        if cls._check_json_v2(data):
             # Going via numpy ensures that we get the endianness correct without
             # annoying string parsing.
-            return cls.from_native_dtype(np.dtype(data))
+            name = data["name"]
+            return cls.from_native_dtype(np.dtype(name))
         msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected one of the strings {cls._zarr_v2_names}."
         raise DataTypeValidationError(msg)
 
     @classmethod
-    def from_json_v3(cls, data: JSON) -> Self:
+    def _from_json_v3(cls, data: DTypeJSON) -> Self:
         if cls._check_json_v3(data):
             return cls()
         msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected {cls._zarr_v3_name}."
         raise DataTypeValidationError(msg)
 
-    def to_json(self, zarr_format: ZarrFormat) -> str:
+    @overload  # type: ignore[override]
+    def to_json(self, zarr_format: Literal[2]) -> DTypeConfig_V2[str, None]: ...
+
+    @overload
+    def to_json(self, zarr_format: Literal[3]) -> str: ...
+
+    def to_json(self, zarr_format: ZarrFormat) -> DTypeConfig_V2[str, None] | str:
         """
         Convert the wrapped data type to a JSON-serializable form.
 
@@ -85,7 +103,7 @@ def to_json(self, zarr_format: ZarrFormat) -> str:
             The JSON-serializable representation of the wrapped data type
         """
         if zarr_format == 2:
-            return self.to_native_dtype().str
+            return {"name": self.to_native_dtype().str, "object_codec_id": None}
         elif zarr_format == 3:
             return self._zarr_v3_name
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
@@ -175,7 +193,7 @@ def to_json_scalar(self, data: object, *, zarr_format: ZarrFormat) -> float | st
 class Float16(BaseFloat[np.dtypes.Float16DType, np.float16]):
     dtype_cls = np.dtypes.Float16DType
     _zarr_v3_name = "float16"
-    _zarr_v2_names: ClassVar[tuple[str, ...]] = (">f2", "<f2")
+    _zarr_v2_names: ClassVar[tuple[Literal[">f2"], Literal["<f2"]]] = (">f2", "<f2")
 
     @property
     def item_size(self) -> int:
@@ -186,7 +204,7 @@ def item_size(self) -> int:
 class Float32(BaseFloat[np.dtypes.Float32DType, np.float32]):
     dtype_cls = np.dtypes.Float32DType
     _zarr_v3_name = "float32"
-    _zarr_v2_names: ClassVar[tuple[str, ...]] = (">f4", "<f4")
+    _zarr_v2_names: ClassVar[tuple[Literal[">f4"], Literal["<f4"]]] = (">f4", "<f4")
 
     @property
     def item_size(self) -> int:
@@ -197,7 +215,7 @@ def item_size(self) -> int:
 class Float64(BaseFloat[np.dtypes.Float64DType, np.float64]):
     dtype_cls = np.dtypes.Float64DType
     _zarr_v3_name = "float64"
-    _zarr_v2_names: ClassVar[tuple[str, ...]] = (">f8", "<f8")
+    _zarr_v2_names: ClassVar[tuple[Literal[">f8"], Literal["<f8"]]] = (">f8", "<f8")
 
     @property
     def item_size(self) -> int:
diff --git a/src/zarr/core/dtype/npy/int.py b/src/zarr/core/dtype/npy/int.py
index cddcb26c5e..79d3ce2d47 100644
--- a/src/zarr/core/dtype/npy/int.py
+++ b/src/zarr/core/dtype/npy/int.py
@@ -1,5 +1,8 @@
+from __future__ import annotations
+
 from dataclasses import dataclass
 from typing import (
+    TYPE_CHECKING,
     ClassVar,
     Literal,
     Self,
@@ -12,8 +15,14 @@
 
 import numpy as np
 
-from zarr.core.common import JSON, ZarrFormat
-from zarr.core.dtype.common import DataTypeValidationError, HasEndianness, HasItemSize
+from zarr.core.dtype.common import (
+    DataTypeValidationError,
+    DTypeConfig_V2,
+    DTypeJSON,
+    HasEndianness,
+    HasItemSize,
+    check_dtype_spec_v2,
+)
 from zarr.core.dtype.npy.common import (
     check_json_int,
     endianness_to_numpy_str,
@@ -21,6 +30,9 @@
 )
 from zarr.core.dtype.wrapper import TBaseDType, ZDType
 
+if TYPE_CHECKING:
+    from zarr.core.common import JSON, ZarrFormat
+
 _NumpyIntDType = (
     np.dtypes.Int8DType
     | np.dtypes.Int16DType
@@ -45,14 +57,18 @@ class BaseInt(ZDType[TIntDType_co, TIntScalar_co], HasItemSize):
     _zarr_v2_names: ClassVar[tuple[str, ...]]
 
     @classmethod
-    def _check_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> TypeGuard[str]:
+    def _check_json_v2(cls, data: object) -> TypeGuard[DTypeConfig_V2[str, None]]:
         """
         Check that the input is a valid JSON representation of this data type.
         """
-        return data in cls._zarr_v2_names
+        return (
+            check_dtype_spec_v2(data)
+            and data["name"] in cls._zarr_v2_names
+            and data["object_codec_id"] is None
+        )
 
     @classmethod
-    def _check_json_v3(cls, data: JSON) -> TypeGuard[str]:
+    def _check_json_v3(cls, data: object) -> TypeGuard[str]:
         """
         Check that a JSON value is consistent with the zarr v3 spec for this data type.
         """
@@ -147,26 +163,28 @@ def to_native_dtype(self: Self) -> np.dtypes.Int8DType:
         return self.dtype_cls()
 
     @classmethod
-    def from_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> Self:
-        if cls._check_json_v2(data, object_codec_id=object_codec_id):
+    def _from_json_v2(cls, data: DTypeJSON) -> Self:
+        if cls._check_json_v2(data):
             return cls()
-        msg = f"Invalid JSON representation of Int8. Got {data!r}, expected the string {cls._zarr_v2_names[0]!r}"
+        msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected the string {cls._zarr_v2_names[0]!r}"
         raise DataTypeValidationError(msg)
 
     @classmethod
-    def from_json_v3(cls, data: JSON) -> Self:
+    def _from_json_v3(cls, data: DTypeJSON) -> Self:
         if cls._check_json_v3(data):
             return cls()
-        msg = f"Invalid JSON representation of Int8. Got {data!r}, expected the string {cls._zarr_v3_name!r}"
+        msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected the string {cls._zarr_v3_name!r}"
         raise DataTypeValidationError(msg)
 
-    @overload
-    def to_json(self, zarr_format: Literal[2]) -> Literal["|i1"]: ...
+    @overload  # type: ignore[override]
+    def to_json(self, zarr_format: Literal[2]) -> DTypeConfig_V2[Literal["|i1"], None]: ...
 
     @overload
     def to_json(self, zarr_format: Literal[3]) -> Literal["int8"]: ...
 
-    def to_json(self, zarr_format: ZarrFormat) -> Literal["int8", "|i1"]:
+    def to_json(
+        self, zarr_format: ZarrFormat
+    ) -> DTypeConfig_V2[Literal["|i1"], None] | Literal["int8"]:
         """
         Convert the wrapped data type to a JSON-serializable form.
 
@@ -181,7 +199,7 @@ def to_json(self, zarr_format: ZarrFormat) -> Literal["int8", "|i1"]:
             The JSON-serializable representation of the wrapped data type
         """
         if zarr_format == 2:
-            return self._zarr_v2_names[0]
+            return {"name": self._zarr_v2_names[0], "object_codec_id": None}
         elif zarr_format == 3:
             return self._zarr_v3_name
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
@@ -212,26 +230,28 @@ def to_native_dtype(self: Self) -> np.dtypes.UInt8DType:
         return self.dtype_cls()
 
     @classmethod
-    def from_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> Self:
-        if cls._check_json_v2(data, object_codec_id=object_codec_id):
+    def _from_json_v2(cls, data: DTypeJSON) -> Self:
+        if cls._check_json_v2(data):
             return cls()
-        msg = f"Invalid JSON representation of UInt8. Got {data!r}, expected the string {cls._zarr_v2_names[0]!r}"
+        msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected the string {cls._zarr_v2_names[0]!r}"
         raise DataTypeValidationError(msg)
 
     @classmethod
-    def from_json_v3(cls, data: JSON) -> Self:
+    def _from_json_v3(cls, data: DTypeJSON) -> Self:
         if cls._check_json_v3(data):
             return cls()
-        msg = f"Invalid JSON representation of UInt8. Got {data!r}, expected the string {cls._zarr_v3_name!r}"
+        msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected the string {cls._zarr_v3_name!r}"
         raise DataTypeValidationError(msg)
 
-    @overload
-    def to_json(self, zarr_format: Literal[2]) -> Literal["|u1"]: ...
+    @overload  # type: ignore[override]
+    def to_json(self, zarr_format: Literal[2]) -> DTypeConfig_V2[Literal["|u1"], None]: ...
 
     @overload
     def to_json(self, zarr_format: Literal[3]) -> Literal["uint8"]: ...
 
-    def to_json(self, zarr_format: ZarrFormat) -> Literal["uint8", "|u1"]:
+    def to_json(
+        self, zarr_format: ZarrFormat
+    ) -> DTypeConfig_V2[Literal["|u1"], None] | Literal["uint8"]:
         """
         Convert the wrapped data type to a JSON-serializable form.
 
@@ -246,7 +266,7 @@ def to_json(self, zarr_format: ZarrFormat) -> Literal["uint8", "|u1"]:
             The JSON-serializable representation of the wrapped data type
         """
         if zarr_format == 2:
-            return self._zarr_v2_names[0]
+            return {"name": self._zarr_v2_names[0], "object_codec_id": None}
         elif zarr_format == 3:
             return self._zarr_v3_name
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
@@ -275,28 +295,31 @@ def to_native_dtype(self) -> np.dtypes.Int16DType:
         return self.dtype_cls().newbyteorder(byte_order)
 
     @classmethod
-    def from_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> Self:
-        if cls._check_json_v2(data, object_codec_id=object_codec_id):
+    def _from_json_v2(cls, data: DTypeJSON) -> Self:
+        if cls._check_json_v2(data):
             # Going via numpy ensures that we get the endianness correct without
             # annoying string parsing.
-            return cls.from_native_dtype(np.dtype(data))
-        msg = f"Invalid JSON representation of Int16. Got {data!r}, expected one of the strings {cls._zarr_v2_names!r}."
+            name = data["name"]
+            return cls.from_native_dtype(np.dtype(name))
+        msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected one of the strings {cls._zarr_v2_names!r}."
         raise DataTypeValidationError(msg)
 
     @classmethod
-    def from_json_v3(cls, data: JSON) -> Self:
+    def _from_json_v3(cls, data: DTypeJSON) -> Self:
         if cls._check_json_v3(data):
             return cls()
-        msg = f"Invalid JSON representation of Int16. Got {data!r}, expected the string {cls._zarr_v3_name!r}"
+        msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected the string {cls._zarr_v3_name!r}"
         raise DataTypeValidationError(msg)
 
-    @overload
-    def to_json(self, zarr_format: Literal[2]) -> Literal[">i2", "<i2"]: ...
+    @overload  # type: ignore[override]
+    def to_json(self, zarr_format: Literal[2]) -> DTypeConfig_V2[Literal[">i2", "<i2"], None]: ...
 
     @overload
     def to_json(self, zarr_format: Literal[3]) -> Literal["int16"]: ...
 
-    def to_json(self, zarr_format: ZarrFormat) -> Literal["int16", ">i2", "<i2"]:
+    def to_json(
+        self, zarr_format: ZarrFormat
+    ) -> DTypeConfig_V2[Literal[">i2", "<i2"], None] | Literal["int16"]:
         """
         Convert the wrapped data type to a JSON-serializable form.
 
@@ -311,7 +334,8 @@ def to_json(self, zarr_format: ZarrFormat) -> Literal["int16", ">i2", "<i2"]:
             The JSON-serializable representation of the wrapped data type
         """
         if zarr_format == 2:
-            return self.to_native_dtype().str
+            name = self.to_native_dtype().str
+            return {"name": name, "object_codec_id": None}
         elif zarr_format == 3:
             return self._zarr_v3_name
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
@@ -340,28 +364,31 @@ def to_native_dtype(self) -> np.dtypes.UInt16DType:
         return self.dtype_cls().newbyteorder(byte_order)
 
     @classmethod
-    def from_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> Self:
-        if cls._check_json_v2(data, object_codec_id=object_codec_id):
+    def _from_json_v2(cls, data: DTypeJSON) -> Self:
+        if cls._check_json_v2(data):
             # Going via numpy ensures that we get the endianness correct without
             # annoying string parsing.
-            return cls.from_native_dtype(np.dtype(data))
+            name = data["name"]
+            return cls.from_native_dtype(np.dtype(name))
         msg = f"Invalid JSON representation of UInt16. Got {data!r}, expected one of the strings {cls._zarr_v2_names}."
         raise DataTypeValidationError(msg)
 
     @classmethod
-    def from_json_v3(cls, data: JSON) -> Self:
+    def _from_json_v3(cls, data: DTypeJSON) -> Self:
         if cls._check_json_v3(data):
             return cls()
         msg = f"Invalid JSON representation of UInt16. Got {data!r}, expected the string {cls._zarr_v3_name!r}"
         raise DataTypeValidationError(msg)
 
-    @overload
-    def to_json(self, zarr_format: Literal[2]) -> Literal[">u2", "<u2"]: ...
+    @overload  # type: ignore[override]
+    def to_json(self, zarr_format: Literal[2]) -> DTypeConfig_V2[Literal[">u2", "<u2"], None]: ...
 
     @overload
     def to_json(self, zarr_format: Literal[3]) -> Literal["uint16"]: ...
 
-    def to_json(self, zarr_format: ZarrFormat) -> Literal["uint16", ">u2", "<u2"]:
+    def to_json(
+        self, zarr_format: ZarrFormat
+    ) -> DTypeConfig_V2[Literal[">u2", "<u2"], None] | Literal["uint16"]:
         """
         Convert the wrapped data type to a JSON-serializable form.
 
@@ -376,7 +403,8 @@ def to_json(self, zarr_format: ZarrFormat) -> Literal["uint16", ">u2", "<u2"]:
             The JSON-serializable representation of the wrapped data type
         """
         if zarr_format == 2:
-            return self.to_native_dtype().str
+            name = self.to_native_dtype().str
+            return {"name": name, "object_codec_id": None}
         elif zarr_format == 3:
             return self._zarr_v3_name
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
@@ -405,28 +433,31 @@ def to_native_dtype(self) -> np.dtypes.Int32DType:
         return self.dtype_cls().newbyteorder(byte_order)
 
     @classmethod
-    def from_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> Self:
-        if cls._check_json_v2(data, object_codec_id=object_codec_id):
+    def _from_json_v2(cls, data: DTypeJSON) -> Self:
+        if cls._check_json_v2(data):
             # Going via numpy ensures that we get the endianness correct without
             # annoying string parsing.
-            return cls.from_native_dtype(np.dtype(data))
+            name = data["name"]
+            return cls.from_native_dtype(np.dtype(name))
         msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected one of the strings {cls._zarr_v2_names}."
         raise DataTypeValidationError(msg)
 
     @classmethod
-    def from_json_v3(cls, data: JSON) -> Self:
+    def _from_json_v3(cls, data: DTypeJSON) -> Self:
         if cls._check_json_v3(data):
             return cls()
         msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected the string {cls._zarr_v3_name!r}"
         raise DataTypeValidationError(msg)
 
-    @overload
-    def to_json(self, zarr_format: Literal[2]) -> Literal[">i4", "<i4"]: ...
+    @overload  # type: ignore[override]
+    def to_json(self, zarr_format: Literal[2]) -> DTypeConfig_V2[Literal[">i4", "<i4"], None]: ...
 
     @overload
     def to_json(self, zarr_format: Literal[3]) -> Literal["int32"]: ...
 
-    def to_json(self, zarr_format: ZarrFormat) -> Literal["int32", ">i4", "<i4"]:
+    def to_json(
+        self, zarr_format: ZarrFormat
+    ) -> DTypeConfig_V2[Literal[">i4", "<i4"], None] | Literal["int32"]:
         """
         Convert the wrapped data type to a JSON-serializable form.
 
@@ -441,7 +472,8 @@ def to_json(self, zarr_format: ZarrFormat) -> Literal["int32", ">i4", "<i4"]:
             The JSON-serializable representation of the wrapped data type
         """
         if zarr_format == 2:
-            return self.to_native_dtype().str
+            name = self.to_native_dtype().str
+            return {"name": name, "object_codec_id": None}
         elif zarr_format == 3:
             return self._zarr_v3_name
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
@@ -470,26 +502,29 @@ def to_native_dtype(self) -> np.dtypes.UInt32DType:
         return self.dtype_cls().newbyteorder(byte_order)
 
     @classmethod
-    def from_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> Self:
-        if cls._check_json_v2(data, object_codec_id=object_codec_id):
+    def _from_json_v2(cls, data: DTypeJSON) -> Self:
+        if cls._check_json_v2(data):
             # Going via numpy ensures that we get the endianness correct without
             # annoying string parsing.
-            return cls.from_native_dtype(np.dtype(data))
+            name = data["name"]
+            return cls.from_native_dtype(np.dtype(name))
         msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected one of the strings {cls._zarr_v2_names}."
         raise DataTypeValidationError(msg)
 
     @classmethod
-    def from_json_v3(cls, data: JSON) -> Self:
+    def _from_json_v3(cls, data: DTypeJSON) -> Self:
         if cls._check_json_v3(data):
             return cls()
         msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected the string {cls._zarr_v3_name!r}"
         raise DataTypeValidationError(msg)
 
-    @overload
-    def to_json(self, zarr_format: Literal[2]) -> Literal[">u4", "<u4"]: ...
+    @overload  # type: ignore[override]
+    def to_json(self, zarr_format: Literal[2]) -> DTypeConfig_V2[Literal[">u4", "<u4"], None]: ...
     @overload
     def to_json(self, zarr_format: Literal[3]) -> Literal["uint32"]: ...
-    def to_json(self, zarr_format: ZarrFormat) -> Literal["uint32", ">u4", "<u4"]:
+    def to_json(
+        self, zarr_format: ZarrFormat
+    ) -> DTypeConfig_V2[Literal[">u4", "<u4"], None] | Literal["uint32"]:
         """
         Convert the wrapped data type to a JSON-serializable form.
 
@@ -504,7 +539,8 @@ def to_json(self, zarr_format: ZarrFormat) -> Literal["uint32", ">u4", "<u4"]:
             The JSON-serializable representation of the wrapped data type
         """
         if zarr_format == 2:
-            return self.to_native_dtype().str
+            name = self.to_native_dtype().str
+            return {"name": name, "object_codec_id": None}
         elif zarr_format == 3:
             return self._zarr_v3_name
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
@@ -533,26 +569,29 @@ def to_native_dtype(self) -> np.dtypes.Int64DType:
         return self.dtype_cls().newbyteorder(byte_order)
 
     @classmethod
-    def from_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> Self:
-        if cls._check_json_v2(data, object_codec_id=object_codec_id):
+    def _from_json_v2(cls, data: DTypeJSON) -> Self:
+        if cls._check_json_v2(data):
             # Going via numpy ensures that we get the endianness correct without
             # annoying string parsing.
-            return cls.from_native_dtype(np.dtype(data))
+            name = data["name"]
+            return cls.from_native_dtype(np.dtype(name))
         msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected one of the strings {cls._zarr_v2_names}."
         raise DataTypeValidationError(msg)
 
     @classmethod
-    def from_json_v3(cls, data: JSON) -> Self:
+    def _from_json_v3(cls, data: DTypeJSON) -> Self:
         if cls._check_json_v3(data):
             return cls()
         msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected the string {cls._zarr_v3_name!r}"
         raise DataTypeValidationError(msg)
 
-    @overload
-    def to_json(self, zarr_format: Literal[2]) -> Literal[">i8", "<i8"]: ...
+    @overload  # type: ignore[override]
+    def to_json(self, zarr_format: Literal[2]) -> DTypeConfig_V2[Literal[">i8", "<i8"], None]: ...
     @overload
     def to_json(self, zarr_format: Literal[3]) -> Literal["int64"]: ...
-    def to_json(self, zarr_format: ZarrFormat) -> Literal["int64", ">i8", "<i8"]:
+    def to_json(
+        self, zarr_format: ZarrFormat
+    ) -> DTypeConfig_V2[Literal[">i8", "<i8"], None] | Literal["int64"]:
         """
         Convert the wrapped data type to a JSON-serializable form.
 
@@ -567,7 +606,8 @@ def to_json(self, zarr_format: ZarrFormat) -> Literal["int64", ">i8", "<i8"]:
             The JSON-serializable representation of the wrapped data type
         """
         if zarr_format == 2:
-            return self.to_native_dtype().str
+            name = self.to_native_dtype().str
+            return {"name": name, "object_codec_id": None}
         elif zarr_format == 3:
             return self._zarr_v3_name
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")
@@ -588,28 +628,31 @@ def to_native_dtype(self) -> np.dtypes.UInt64DType:
         return self.dtype_cls().newbyteorder(byte_order)
 
     @classmethod
-    def from_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> Self:
-        if cls._check_json_v2(data, object_codec_id=object_codec_id):
+    def _from_json_v2(cls, data: DTypeJSON) -> Self:
+        if cls._check_json_v2(data):
             # Going via numpy ensures that we get the endianness correct without
             # annoying string parsing.
-            return cls.from_native_dtype(np.dtype(data))
+            name = data["name"]
+            return cls.from_native_dtype(np.dtype(name))
         msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected one of the strings {cls._zarr_v2_names}."
         raise DataTypeValidationError(msg)
 
     @classmethod
-    def from_json_v3(cls, data: JSON) -> Self:
+    def _from_json_v3(cls, data: DTypeJSON) -> Self:
         if cls._check_json_v3(data):
             return cls()
         msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected the string {cls._zarr_v3_name!r}"
         raise DataTypeValidationError(msg)
 
-    @overload
-    def to_json(self, zarr_format: Literal[2]) -> Literal[">u8", "<u8"]: ...
+    @overload  # type: ignore[override]
+    def to_json(self, zarr_format: Literal[2]) -> DTypeConfig_V2[Literal[">u8", "<u8"], None]: ...
 
     @overload
     def to_json(self, zarr_format: Literal[3]) -> Literal["uint64"]: ...
 
-    def to_json(self, zarr_format: ZarrFormat) -> Literal["uint64", ">u8", "<u8"]:
+    def to_json(
+        self, zarr_format: ZarrFormat
+    ) -> DTypeConfig_V2[Literal[">u8", "<u8"], None] | Literal["uint64"]:
         """
         Convert the wrapped data type to a JSON-serializable form.
 
@@ -624,7 +667,8 @@ def to_json(self, zarr_format: ZarrFormat) -> Literal["uint64", ">u8", "<u8"]:
             The JSON-serializable representation of the wrapped data type
         """
         if zarr_format == 2:
-            return self.to_native_dtype().str
+            name = self.to_native_dtype().str
+            return {"name": name, "object_codec_id": None}
         elif zarr_format == 3:
             return self._zarr_v3_name
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
diff --git a/src/zarr/core/dtype/npy/string.py b/src/zarr/core/dtype/npy/string.py
index f811dce00a..4a1114617a 100644
--- a/src/zarr/core/dtype/npy/string.py
+++ b/src/zarr/core/dtype/npy/string.py
@@ -19,10 +19,13 @@
 from zarr.core.common import NamedConfig
 from zarr.core.dtype.common import (
     DataTypeValidationError,
+    DTypeConfig_V2,
+    DTypeJSON,
     HasEndianness,
     HasItemSize,
     HasLength,
     HasObjectCodec,
+    check_dtype_spec_v2,
     v3_unstable_dtype_warning,
 )
 from zarr.core.dtype.npy.common import (
@@ -30,7 +33,7 @@
     endianness_to_numpy_str,
     get_endianness_from_numpy_dtype,
 )
-from zarr.core.dtype.wrapper import DTypeJSON_V2, DTypeJSON_V3, TDType_co, ZDType
+from zarr.core.dtype.wrapper import TDType_co, ZDType
 
 if TYPE_CHECKING:
     from zarr.core.common import JSON, ZarrFormat
@@ -77,14 +80,19 @@ def to_native_dtype(self) -> np.dtypes.StrDType[int]:
         return self.dtype_cls(self.length).newbyteorder(byte_order)
 
     @classmethod
-    def _check_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> TypeGuard[str]:
+    def _check_json_v2(cls, data: DTypeJSON) -> TypeGuard[DTypeConfig_V2[str, None]]:
         """
         Check that the input is a valid JSON representation of a numpy U dtype.
         """
-        return isinstance(data, str) and re.match(r"^[><]U\d+$", data) is not None
+        return (
+            check_dtype_spec_v2(data)
+            and isinstance(data["name"], str)
+            and re.match(r"^[><]U\d+$", data["name"]) is not None
+            and data["object_codec_id"] is None
+        )
 
     @classmethod
-    def _check_json_v3(cls, data: JSON) -> TypeGuard[FixedLengthUTF32JSONV3]:
+    def _check_json_v3(cls, data: DTypeJSON) -> TypeGuard[FixedLengthUTF32JSONV3]:
         return (
             isinstance(data, dict)
             and set(data.keys()) == {"name", "configuration"}
@@ -95,15 +103,17 @@ def _check_json_v3(cls, data: JSON) -> TypeGuard[FixedLengthUTF32JSONV3]:
             and isinstance(data["configuration"]["length_bytes"], int)
         )
 
-    @overload
-    def to_json(self, zarr_format: Literal[2]) -> str: ...
+    @overload  # type: ignore[override]
+    def to_json(self, zarr_format: Literal[2]) -> DTypeConfig_V2[str, None]: ...
 
     @overload
     def to_json(self, zarr_format: Literal[3]) -> FixedLengthUTF32JSONV3: ...
 
-    def to_json(self, zarr_format: ZarrFormat) -> str | FixedLengthUTF32JSONV3:
+    def to_json(
+        self, zarr_format: ZarrFormat
+    ) -> DTypeConfig_V2[str, None] | FixedLengthUTF32JSONV3:
         if zarr_format == 2:
-            return self.to_native_dtype().str
+            return {"name": self.to_native_dtype().str, "object_codec_id": None}
         elif zarr_format == 3:
             v3_unstable_dtype_warning(self)
             return {
@@ -113,16 +123,17 @@ def to_json(self, zarr_format: ZarrFormat) -> str | FixedLengthUTF32JSONV3:
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     @classmethod
-    def from_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> Self:
-        if cls._check_json_v2(data, object_codec_id=object_codec_id):
+    def _from_json_v2(cls, data: DTypeJSON) -> Self:
+        if cls._check_json_v2(data):
             # Construct the numpy dtype instead of string parsing.
-            return cls.from_native_dtype(np.dtype(data))
+            name = data["name"]
+            return cls.from_native_dtype(np.dtype(name))
         raise DataTypeValidationError(
             f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected a string representation of a numpy U dtype."
         )
 
     @classmethod
-    def from_json_v3(cls, data: JSON) -> Self:
+    def _from_json_v3(cls, data: DTypeJSON) -> Self:
         if cls._check_json_v3(data):
             return cls(length=data["configuration"]["length_bytes"] // cls.code_point_bytes)
         msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected {cls._zarr_v3_name}."
@@ -197,21 +208,26 @@ def from_native_dtype(cls, dtype: TBaseDType) -> Self:
 
     @classmethod
     def _check_json_v2(
-        cls, data: JSON, *, object_codec_id: str | None = None
-    ) -> TypeGuard[Literal["|O"]]:
+        cls,
+        data: DTypeJSON,
+    ) -> TypeGuard[DTypeConfig_V2[Literal["|O"], Literal["vlen-utf8"]]]:
         """
         Check that the input is a valid JSON representation of a numpy O dtype, and that the
         object codec id is appropriate for variable-length UTF-8 strings.
         """
-        return data == "|O" and object_codec_id == cls.object_codec_id
+        return (
+            check_dtype_spec_v2(data)
+            and data["name"] == "|O"
+            and data["object_codec_id"] == cls.object_codec_id
+        )
 
     @classmethod
-    def _check_json_v3(cls, data: JSON) -> TypeGuard[Literal["variable_length_utf8"]]:
+    def _check_json_v3(cls, data: DTypeJSON) -> TypeGuard[Literal["variable_length_utf8"]]:
         return data == cls._zarr_v3_name
 
     @classmethod
-    def from_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> Self:
-        if cls._check_json_v2(data, object_codec_id=object_codec_id):
+    def _from_json_v2(cls, data: DTypeJSON) -> Self:
+        if cls._check_json_v2(data):
             return cls()
         msg = (
             f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected the string '|O'"
@@ -219,35 +235,29 @@ def from_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> Self
         raise DataTypeValidationError(msg)
 
     @classmethod
-    def from_json_v3(cls, data: JSON) -> Self:
+    def _from_json_v3(cls, data: DTypeJSON) -> Self:
         if cls._check_json_v3(data):
             return cls()
         msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected {cls._zarr_v3_name}."
         raise DataTypeValidationError(msg)
 
-    @overload
-    def to_json(self, zarr_format: Literal[2]) -> Literal["|O"]: ...
+    @overload  # type: ignore[override]
+    def to_json(
+        self, zarr_format: Literal[2]
+    ) -> DTypeConfig_V2[Literal["|O"], Literal["vlen-utf8"]]: ...
     @overload
     def to_json(self, zarr_format: Literal[3]) -> Literal["variable_length_utf8"]: ...
 
-    def to_json(self, zarr_format: ZarrFormat) -> Literal["|O", "variable_length_utf8"]:
+    def to_json(
+        self, zarr_format: ZarrFormat
+    ) -> DTypeConfig_V2[Literal["|O"], Literal["vlen-utf8"]] | Literal["variable_length_utf8"]:
         if zarr_format == 2:
-            # Note: unlike many other numpy data types, we don't serialize the .str attribute
-            # of the data type to JSON. This is because Zarr was using `|O` for strings before the
-            # numpy variable length string data type existed, and we want to be consistent with
-            # that practice
-            return "|O"
+            return {"name": "|O", "object_codec_id": self.object_codec_id}
         elif zarr_format == 3:
             v3_unstable_dtype_warning(self)
             return self._zarr_v3_name
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
-    @classmethod
-    def _from_json_unchecked(
-        cls, data: DTypeJSON_V2 | DTypeJSON_V3, *, zarr_format: ZarrFormat
-    ) -> Self:
-        return cls()
-
     def default_scalar(self) -> str:
         return ""
 
diff --git a/src/zarr/core/dtype/npy/structured.py b/src/zarr/core/dtype/npy/structured.py
index 66dfed87f6..d9e1ff55ae 100644
--- a/src/zarr/core/dtype/npy/structured.py
+++ b/src/zarr/core/dtype/npy/structured.py
@@ -1,13 +1,19 @@
-from collections.abc import Sequence
+from __future__ import annotations
+
 from dataclasses import dataclass
-from typing import Literal, Self, TypeGuard, cast, overload
+from typing import TYPE_CHECKING, Literal, Self, TypeGuard, cast, overload
 
 import numpy as np
 
-from zarr.core.common import JSON, NamedConfig, ZarrFormat
 from zarr.core.dtype.common import (
     DataTypeValidationError,
+    DTypeConfig_V2,
+    DTypeJSON,
+    DTypeSpec_V3,
     HasItemSize,
+    StructuredName_V2,
+    check_dtype_spec_v2,
+    check_structured_dtype_name_v2,
     v3_unstable_dtype_warning,
 )
 from zarr.core.dtype.npy.common import (
@@ -15,12 +21,16 @@
     bytes_to_json,
     check_json_str,
 )
-from zarr.core.dtype.wrapper import DTypeJSON_V2, DTypeJSON_V3, TBaseDType, TBaseScalar, ZDType
+from zarr.core.dtype.wrapper import TBaseDType, TBaseScalar, ZDType
+
+if TYPE_CHECKING:
+    from collections.abc import Sequence
+
+    from zarr.core.common import JSON, NamedConfig, ZarrFormat
 
 StructuredScalarLike = list[object] | tuple[object, ...] | bytes | int
 
 
-# TODO: tighten this up, get a v3 spec in place, handle endianness, etc.
 @dataclass(frozen=True, kw_only=True)
 class Structured(ZDType[np.dtypes.VoidDType[int], np.void], HasItemSize):
     dtype_cls = np.dtypes.VoidDType  # type: ignore[assignment]
@@ -69,24 +79,20 @@ def to_native_dtype(self) -> np.dtypes.VoidDType[int]:
 
     @classmethod
     def _check_json_v2(
-        cls, data: JSON, *, object_codec_id: str | None = None
-    ) -> TypeGuard[list[object]]:
-        # the actual JSON form is recursive and hard to annotate, so we give up and do
-        # list[object] for now
-
+        cls,
+        data: DTypeJSON,
+    ) -> TypeGuard[DTypeConfig_V2[StructuredName_V2, None]]:
         return (
-            not isinstance(data, str)
-            and isinstance(data, Sequence)
-            and all(
-                not isinstance(field, str) and isinstance(field, Sequence) and len(field) == 2
-                for field in data
-            )
+            check_dtype_spec_v2(data)
+            and not isinstance(data["name"], str)
+            and check_structured_dtype_name_v2(data["name"])
+            and data["object_codec_id"] is None
         )
 
     @classmethod
     def _check_json_v3(
-        cls, data: JSON
-    ) -> TypeGuard[NamedConfig[Literal["structured"], dict[str, Sequence[tuple[str, JSON]]]]]:
+        cls, data: DTypeJSON
+    ) -> TypeGuard[NamedConfig[Literal["structured"], dict[str, Sequence[tuple[str, DTypeJSON]]]]]:
         return (
             isinstance(data, dict)
             and set(data.keys()) == {"name", "configuration"}
@@ -96,9 +102,9 @@ def _check_json_v3(
         )
 
     @classmethod
-    def from_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> Self:
+    def _from_json_v2(cls, data: DTypeJSON) -> Self:
         # avoid circular import
-        from zarr.core.dtype import get_data_type_from_json_v2
+        from zarr.core.dtype import get_data_type_from_json
 
         if cls._check_json_v2(data):
             # structured dtypes are constructed directly from a list of lists
@@ -106,46 +112,59 @@ def from_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> Self
             # dtypes from containing object dtypes.
             return cls(
                 fields=tuple(  # type: ignore[misc]
-                    (f_name, get_data_type_from_json_v2(f_dtype, object_codec_id=None))  # type: ignore[has-type]
-                    for f_name, f_dtype in data
+                    (  # type: ignore[misc]
+                        f_name,
+                        get_data_type_from_json(
+                            {"name": f_dtype, "object_codec_id": None}, zarr_format=2
+                        ),
+                    )
+                    for f_name, f_dtype in data["name"]
                 )
             )
         msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected a JSON array of arrays"
         raise DataTypeValidationError(msg)
 
     @classmethod
-    def from_json_v3(cls, data: JSON) -> Self:
+    def _from_json_v3(cls, data: DTypeJSON) -> Self:
         # avoid circular import
-        from zarr.core.dtype import get_data_type_from_json_v3
+        from zarr.core.dtype import get_data_type_from_json
 
         if cls._check_json_v3(data):
             config = data["configuration"]
             meta_fields = config["fields"]
             return cls(
                 fields=tuple(
-                    (f_name, get_data_type_from_json_v3(f_dtype)) for f_name, f_dtype in meta_fields
+                    (f_name, get_data_type_from_json(f_dtype, zarr_format=3))
+                    for f_name, f_dtype in meta_fields
                 )
             )
         msg = f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected a JSON object with the key {cls._zarr_v3_name!r}"
         raise DataTypeValidationError(msg)
 
-    @overload
-    def to_json(self, zarr_format: Literal[2]) -> DTypeJSON_V2: ...
+    @overload  # type: ignore[override]
+    def to_json(self, zarr_format: Literal[2]) -> DTypeConfig_V2[StructuredName_V2, None]: ...
 
     @overload
-    def to_json(self, zarr_format: Literal[3]) -> DTypeJSON_V3: ...
+    def to_json(self, zarr_format: Literal[3]) -> DTypeSpec_V3: ...
 
-    def to_json(self, zarr_format: ZarrFormat) -> DTypeJSON_V3 | DTypeJSON_V2:
-        fields = [
-            (f_name, f_dtype.to_json(zarr_format=zarr_format)) for f_name, f_dtype in self.fields
-        ]
+    def to_json(
+        self, zarr_format: ZarrFormat
+    ) -> DTypeConfig_V2[StructuredName_V2, None] | DTypeSpec_V3:
         if zarr_format == 2:
-            return fields
+            fields = [
+                [f_name, f_dtype.to_json(zarr_format=zarr_format)["name"]]
+                for f_name, f_dtype in self.fields
+            ]
+            return {"name": fields, "object_codec_id": None}
         elif zarr_format == 3:
             v3_unstable_dtype_warning(self)
+            fields = [
+                [f_name, f_dtype.to_json(zarr_format=zarr_format)]  # type: ignore[list-item]
+                for f_name, f_dtype in self.fields
+            ]
             base_dict = {"name": self._zarr_v3_name}
             base_dict["configuration"] = {"fields": fields}  # type: ignore[assignment]
-            return cast("DTypeJSON_V3", base_dict)
+            return cast("DTypeSpec_V3", base_dict)
         raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     def _check_scalar(self, data: object) -> TypeGuard[StructuredScalarLike]:
diff --git a/src/zarr/core/dtype/npy/time.py b/src/zarr/core/dtype/npy/time.py
index a99703dd3e..1f9080475c 100644
--- a/src/zarr/core/dtype/npy/time.py
+++ b/src/zarr/core/dtype/npy/time.py
@@ -18,8 +18,16 @@
 import numpy as np
 
 from zarr.core.common import NamedConfig
-from zarr.core.dtype.common import DataTypeValidationError, HasEndianness, HasItemSize
+from zarr.core.dtype.common import (
+    DataTypeValidationError,
+    DTypeConfig_V2,
+    DTypeJSON,
+    HasEndianness,
+    HasItemSize,
+    check_dtype_spec_v2,
+)
 from zarr.core.dtype.npy.common import (
+    DATETIME_UNIT,
     DateTimeUnit,
     check_json_int,
     endianness_to_numpy_str,
@@ -140,14 +148,17 @@ def to_native_dtype(self) -> BaseTimeDType_co:
         dtype_string = f"{self._numpy_name}[{self.scale_factor}{self.unit}]"
         return np.dtype(dtype_string).newbyteorder(endianness_to_numpy_str(self.endianness))  # type: ignore[return-value]
 
-    @overload
-    def to_json(self, zarr_format: Literal[2]) -> str: ...
+    @overload  # type: ignore[override]
+    def to_json(self, zarr_format: Literal[2]) -> DTypeConfig_V2[str, None]: ...
     @overload
     def to_json(self, zarr_format: Literal[3]) -> DateTime64JSONV3 | TimeDelta64JSONV3: ...
 
-    def to_json(self, zarr_format: ZarrFormat) -> str | DateTime64JSONV3 | TimeDelta64JSONV3:
+    def to_json(
+        self, zarr_format: ZarrFormat
+    ) -> DTypeConfig_V2[str, None] | DateTime64JSONV3 | TimeDelta64JSONV3:
         if zarr_format == 2:
-            return cast("str", self.to_native_dtype().str)
+            name = self.to_native_dtype().str
+            return {"name": name, "object_codec_id": None}
         elif zarr_format == 3:
             return cast(
                 "DateTime64JSONV3 | TimeDelta64JSONV3",
@@ -185,22 +196,25 @@ class TimeDelta64(TimeDTypeBase[np.dtypes.TimeDelta64DType, np.timedelta64], Has
     unit: DateTimeUnit = "generic"
 
     @classmethod
-    def _check_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> TypeGuard[str]:
+    def _check_json_v2(cls, data: DTypeJSON) -> TypeGuard[DTypeConfig_V2[str, None]]:
+        if not check_dtype_spec_v2(data):
+            return False
+        name = data["name"]
         # match <m[ns], >m[M], etc
         # consider making this a standalone function
-        if not isinstance(data, str):
+        if not isinstance(name, str):
             return False
-        if not data.startswith(cls._zarr_v2_names):
+        if not name.startswith(cls._zarr_v2_names):
             return False
-        if len(data) == 3:
+        if len(name) == 3:
             # no unit, and
             # we already checked that this string is either <m8 or >m8
             return True
         else:
-            return data[4:-1].endswith(get_args(DateTimeUnit)) and data[-1] == "]"
+            return name[4:-1].endswith(DATETIME_UNIT) and name[-1] == "]"
 
     @classmethod
-    def _check_json_v3(cls, data: JSON) -> TypeGuard[DateTime64JSONV3]:
+    def _check_json_v3(cls, data: DTypeJSON) -> TypeGuard[DateTime64JSONV3]:
         return (
             isinstance(data, dict)
             and set(data.keys()) == {"name", "configuration"}
@@ -210,9 +224,10 @@ def _check_json_v3(cls, data: JSON) -> TypeGuard[DateTime64JSONV3]:
         )
 
     @classmethod
-    def from_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> Self:
+    def _from_json_v2(cls, data: DTypeJSON) -> Self:
         if cls._check_json_v2(data):
-            return cls.from_native_dtype(np.dtype(data))
+            name = data["name"]
+            return cls.from_native_dtype(np.dtype(name))
         msg = (
             f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected a string "
             f"representation of an instance of {cls.dtype_cls}"  # type: ignore[has-type]
@@ -220,7 +235,7 @@ def from_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> Self
         raise DataTypeValidationError(msg)
 
     @classmethod
-    def from_json_v3(cls, data: JSON) -> Self:
+    def _from_json_v3(cls, data: DTypeJSON) -> Self:
         if cls._check_json_v3(data):
             unit = data["configuration"]["unit"]
             scale_factor = data["configuration"]["scale_factor"]
@@ -266,7 +281,28 @@ class DateTime64(TimeDTypeBase[np.dtypes.DateTime64DType, np.datetime64], HasEnd
     scale_factor: int = 1
 
     @classmethod
-    def _check_json_v3(cls, data: JSON) -> TypeGuard[DateTime64JSONV3]:
+    def _check_json_v2(cls, data: DTypeJSON) -> TypeGuard[DTypeConfig_V2[str, None]]:
+        """
+        Check that JSON input is a string representation of a NumPy datetime64 data type, like "<M8"
+        of ">M8[10s]". This function can be used as a type guard to narrow the type of unknown JSON
+        input.
+        """
+        if not check_dtype_spec_v2(data):
+            return False
+        name = data["name"]
+        if not isinstance(name, str):
+            return False
+        if not name.startswith(cls._zarr_v2_names):
+            return False
+        if len(name) == 3:
+            # no unit, and
+            # we already checked that this string is either <M8 or >M8
+            return True
+        else:
+            return name[4:-1].endswith(DATETIME_UNIT) and name[-1] == "]"
+
+    @classmethod
+    def _check_json_v3(cls, data: DTypeJSON) -> TypeGuard[DateTime64JSONV3]:
         return (
             isinstance(data, dict)
             and set(data.keys()) == {"name", "configuration"}
@@ -276,9 +312,10 @@ def _check_json_v3(cls, data: JSON) -> TypeGuard[DateTime64JSONV3]:
         )
 
     @classmethod
-    def from_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> Self:
+    def _from_json_v2(cls, data: DTypeJSON) -> Self:
         if cls._check_json_v2(data):
-            return cls.from_native_dtype(np.dtype(data))
+            name = data["name"]
+            return cls.from_native_dtype(np.dtype(name))
         msg = (
             f"Invalid JSON representation of {cls.__name__}. Got {data!r}, expected a string "
             f"representation of an instance of {cls.dtype_cls}"  # type: ignore[has-type]
@@ -286,7 +323,7 @@ def from_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> Self
         raise DataTypeValidationError(msg)
 
     @classmethod
-    def from_json_v3(cls, data: JSON) -> Self:
+    def _from_json_v3(cls, data: DTypeJSON) -> Self:
         if cls._check_json_v3(data):
             unit = data["configuration"]["unit"]
             scale_factor = data["configuration"]["scale_factor"]
@@ -320,21 +357,3 @@ def from_json_scalar(self, data: JSON, *, zarr_format: ZarrFormat) -> np.datetim
         if check_json_time(data):
             return self._cast_scalar_unchecked(data)
         raise TypeError(f"Invalid type: {data}. Expected an integer.")  # pragma: no cover
-
-    @classmethod
-    def _check_json_v2(cls, data: JSON, *, object_codec_id: str | None = None) -> TypeGuard[str]:
-        """
-        Check that JSON input is a string representation of a NumPy datetime64 data type, like "<M8"
-        of ">M8[10s]". This function can be used as a type guard to narrow the type of unknown JSON
-        input.
-        """
-        if not isinstance(data, str):
-            return False
-        if not data.startswith(cls._zarr_v2_names):
-            return False
-        if len(data) == 3:
-            # no unit, and
-            # we already checked that this string is either <M8 or >M8
-            return True
-        else:
-            return data[4:-1].endswith(get_args(DateTimeUnit)) and data[-1] == "]"
diff --git a/src/zarr/core/dtype/registry.py b/src/zarr/core/dtype/registry.py
index 308bde602c..1d2a97a90a 100644
--- a/src/zarr/core/dtype/registry.py
+++ b/src/zarr/core/dtype/registry.py
@@ -6,12 +6,15 @@
 
 import numpy as np
 
-from zarr.core.dtype.common import DataTypeValidationError
+from zarr.core.dtype.common import (
+    DataTypeValidationError,
+    DTypeJSON,
+)
 
 if TYPE_CHECKING:
     from importlib.metadata import EntryPoint
 
-    from zarr.core.common import JSON
+    from zarr.core.common import ZarrFormat
     from zarr.core.dtype.wrapper import TBaseDType, TBaseScalar, ZDType
 
 
@@ -74,26 +77,14 @@ def match_dtype(self, dtype: TBaseDType) -> ZDType[TBaseDType, TBaseScalar]:
                 "For more information, see https://github.com/zarr-developers/zarr-python/issues/3117"
             )
             raise ValueError(msg)
-        raise ValueError(f"No data type wrapper found that matches dtype '{dtype}'")
+        raise ValueError(f"No Zarr data type found that matches dtype '{dtype!r}'")
 
-    def match_json_v2(
-        self, data: JSON, *, object_codec_id: str | None = None
+    def match_json(
+        self, data: DTypeJSON, *, zarr_format: ZarrFormat
     ) -> ZDType[TBaseDType, TBaseScalar]:
-        # The dtype field in zarr v2 JSON metadata is not unique across different distinct data types.
-        # Specifically, multiple distinct data types all use the "|O" data type representation.
-        # These must be disambiguated by the presence of an "object codec", which is a codec
-        # like variable-length utf8 encoding for strings.
         for val in self.contents.values():
             try:
-                return val.from_json_v2(data, object_codec_id=object_codec_id)
+                return val.from_json(data, zarr_format=zarr_format)
             except DataTypeValidationError:
                 pass
-        raise ValueError(f"No data type wrapper found that matches {data}")
-
-    def match_json_v3(self, data: JSON) -> ZDType[TBaseDType, TBaseScalar]:
-        for val in self.contents.values():
-            try:
-                return val.from_json_v3(data)
-            except DataTypeValidationError:
-                pass
-        raise ValueError(f"No data type wrapper found that matches {data}")
+        raise ValueError(f"No Zarr data type found that matches {data!r}")
diff --git a/src/zarr/core/dtype/wrapper.py b/src/zarr/core/dtype/wrapper.py
index fa34dc000d..e974712e38 100644
--- a/src/zarr/core/dtype/wrapper.py
+++ b/src/zarr/core/dtype/wrapper.py
@@ -24,7 +24,6 @@
 from __future__ import annotations
 
 from abc import ABC, abstractmethod
-from collections.abc import Mapping, Sequence
 from dataclasses import dataclass
 from typing import (
     TYPE_CHECKING,
@@ -41,6 +40,7 @@
 
 if TYPE_CHECKING:
     from zarr.core.common import JSON, ZarrFormat
+    from zarr.core.dtype.common import DTypeJSON, DTypeSpec_V2, DTypeSpec_V3
 
 # This the upper bound for the scalar types we support. It's numpy scalars + str,
 # because the new variable-length string dtype in numpy does not have a corresponding scalar type
@@ -55,10 +55,6 @@
 TScalar_co = TypeVar("TScalar_co", bound=TBaseScalar, covariant=True)
 TDType_co = TypeVar("TDType_co", bound=TBaseDType, covariant=True)
 
-# These types should include all JSON-serializable types that can be used to represent a data type.
-DTypeJSON_V2 = str | Sequence[object]
-DTypeJSON_V3 = str | Mapping[str, object]
-
 
 @dataclass(frozen=True, kw_only=True, slots=True)
 class ZDType(Generic[TDType_co, TScalar_co], ABC):
@@ -140,94 +136,45 @@ def to_native_dtype(self: Self) -> TDType_co:
 
     @classmethod
     @abstractmethod
-    def _check_json_v2(
-        cls: type[Self], data: JSON, *, object_codec_id: str | None = None
-    ) -> TypeGuard[DTypeJSON_V2]:
-        """
-        Check that JSON data matches the Zarr V2 JSON serialization of this ZDType.
-
-        Parameters
-        ----------
-        data : JSON
-            The JSON representation of the data type.
-
-        object_codec_id : str | None
-            The string identifier of an object codec, if applicable. Object codecs are specific
-            numcodecs codecs that zarr-python 2.x used to serialize numpy "Object" scalars.
-            For example, a dtype field set to ``"|O"`` with an object codec ID of "vlen-utf8"
-            indicates that the data type is a variable-length string.
-
-            Zarr V3 has no such logic, so this parameter is only used for Zarr V2 compatibility.
-
-        Returns
-        -------
-        Bool
-            True if the JSON representation matches this data type, False otherwise.
-        """
-        ...
+    def _from_json_v2(cls: type[Self], data: DTypeJSON) -> Self: ...
 
     @classmethod
     @abstractmethod
-    def _check_json_v3(cls: type[Self], data: JSON) -> TypeGuard[DTypeJSON_V3]:
-        """
-        Check that JSON data matches the Zarr V3 JSON serialization of this ZDType.
-
-        Parameters
-        ----------
-        data : JSON
-            The JSON representation of the data type.
-
-        Returns
-        -------
-        Bool
-            True if the JSON representation matches, False otherwise.
-        """
-        ...
+    def _from_json_v3(cls: type[Self], data: DTypeJSON) -> Self: ...
 
     @classmethod
-    @abstractmethod
-    def from_json_v2(cls: type[Self], data: JSON, *, object_codec_id: str | None = None) -> Self:
+    def from_json(cls: type[Self], data: DTypeJSON, *, zarr_format: ZarrFormat) -> Self:
         """
-        Create an instance of this ZDType from Zarr V2 JSON data.
+        Create an instance of this ZDType from JSON data.
 
         Parameters
         ----------
-        data : JSON
-            The JSON representation of the data type.
-
-        Returns
-        -------
-        Self
-            The wrapped data type.
-        """
-        ...
-
-    @classmethod
-    @abstractmethod
-    def from_json_v3(cls: type[Self], data: JSON) -> Self:
-        """
-        Create an instance of this ZDType from Zarr V3 JSON data.
+        data : DTypeJSON
+            The JSON representation of the data type. The type annotation includes
+            Mapping[str, object] to accommodate typed dictionaries.
 
-        Parameters
-        ----------
-        data : JSON
-            The JSON representation of the data type.
+        zarr_format : ZarrFormat
+            The zarr format version.
 
         Returns
         -------
         Self
             The wrapped data type.
         """
-        ...
+        if zarr_format == 2:
+            return cls._from_json_v2(data)
+        if zarr_format == 3:
+            return cls._from_json_v3(data)
+        raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}")  # pragma: no cover
 
     @overload
-    def to_json(self, zarr_format: Literal[2]) -> DTypeJSON_V2: ...
+    def to_json(self, zarr_format: Literal[2]) -> DTypeSpec_V2: ...
 
     @overload
-    def to_json(self, zarr_format: Literal[3]) -> DTypeJSON_V3: ...
+    def to_json(self, zarr_format: Literal[3]) -> DTypeSpec_V3: ...
 
     @abstractmethod
-    def to_json(self, zarr_format: ZarrFormat) -> DTypeJSON_V2 | DTypeJSON_V3:
+    def to_json(self, zarr_format: ZarrFormat) -> DTypeSpec_V2 | DTypeSpec_V3:
         """
         Serialize this ZDType to JSON.
 
diff --git a/src/zarr/core/metadata/v2.py b/src/zarr/core/metadata/v2.py
index ec1ac42264..3ac75e0418 100644
--- a/src/zarr/core/metadata/v2.py
+++ b/src/zarr/core/metadata/v2.py
@@ -3,14 +3,14 @@
 import warnings
 from collections.abc import Iterable, Sequence
 from functools import cached_property
-from typing import TYPE_CHECKING, Any, TypeAlias, TypedDict
+from typing import TYPE_CHECKING, Any, TypeAlias, TypedDict, cast
 
 import numcodecs.abc
 
 from zarr.abc.metadata import Metadata
 from zarr.core.chunk_grids import RegularChunkGrid
-from zarr.core.dtype import get_data_type_from_json_v2
-from zarr.core.dtype.wrapper import TBaseDType, TBaseScalar, TDType_co, TScalar_co, ZDType
+from zarr.core.dtype import get_data_type_from_json
+from zarr.core.dtype.common import OBJECT_CODEC_IDS, DTypeSpec_V2
 
 if TYPE_CHECKING:
     from typing import Literal, Self
@@ -19,6 +19,13 @@
 
     from zarr.core.buffer import Buffer, BufferPrototype
     from zarr.core.common import ChunkCoords
+    from zarr.core.dtype.wrapper import (
+        TBaseDType,
+        TBaseScalar,
+        TDType_co,
+        TScalar_co,
+        ZDType,
+    )
 
 import json
 from dataclasses import dataclass, field, fields, replace
@@ -28,7 +35,13 @@
 
 from zarr.core.array_spec import ArrayConfig, ArraySpec
 from zarr.core.chunk_key_encodings import parse_separator
-from zarr.core.common import JSON, ZARRAY_JSON, ZATTRS_JSON, MemoryOrder, parse_shapelike
+from zarr.core.common import (
+    JSON,
+    ZARRAY_JSON,
+    ZATTRS_JSON,
+    MemoryOrder,
+    parse_shapelike,
+)
 from zarr.core.config import config, parse_indexing_order
 from zarr.core.metadata.common import parse_attributes
 
@@ -45,9 +58,6 @@ class ArrayV2MetadataDict(TypedDict):
 # Union of acceptable types for v2 compressors
 CompressorLikev2: TypeAlias = dict[str, JSON] | numcodecs.abc.Codec | None
 
-# These are the ids of the known object codecs for zarr v2.
-ObjectCodecIds = ("vlen-utf8", "vlen-bytes", "vlen-array", "pickle", "json2", "msgpack2")
-
 
 @dataclass(frozen=True, kw_only=True)
 class ArrayV2Metadata(Metadata):
@@ -80,9 +90,6 @@ def __init__(
         """
         shape_parsed = parse_shapelike(shape)
         chunks_parsed = parse_shapelike(chunks)
-        # TODO: remove this
-        if not isinstance(dtype, ZDType):
-            raise TypeError
         compressor_parsed = parse_compressor(compressor)
         order_parsed = parse_indexing_order(order)
         dimension_separator_parsed = parse_separator(dimension_separator)
@@ -141,22 +148,22 @@ def from_dict(cls, data: dict[str, Any]) -> ArrayV2Metadata:
 
         # To resolve a numpy object dtype array, we need to search for an object codec,
         # which could be in filters or as a compressor.
-        # we will use a hard-coded list of object codecs for this search.
-        object_codec_id: str | None = None
-        maybe_object_codecs = (data.get("filters"), data.get("compressor"))
-        for maybe_object_codec in maybe_object_codecs:
-            if isinstance(maybe_object_codec, Sequence):
-                for codec in maybe_object_codec:
-                    if isinstance(codec, dict) and codec.get("id") in ObjectCodecIds:
-                        object_codec_id = codec["id"]
-                        break
-            elif (
-                isinstance(maybe_object_codec, dict)
-                and maybe_object_codec.get("id") in ObjectCodecIds
-            ):
-                object_codec_id = maybe_object_codec["id"]
-                break
-        dtype = get_data_type_from_json_v2(data["dtype"], object_codec_id=object_codec_id)
+        # we will reference a hard-coded collection of object codec ids for this search.
+
+        _filters, _compressor = (data.get("filters"), data.get("compressor"))
+        if _filters is not None:
+            _filters = cast("tuple[dict[str, JSON], ...]", _filters)
+            object_codec_id = get_object_codec_id(tuple(_filters) + (_compressor,))
+        else:
+            object_codec_id = get_object_codec_id((_compressor,))
+        # we add a layer of indirection here around the dtype attribute of the array metadata
+        # because we also need to know the object codec id, if any, to resolve the data type
+        dtype_spec: DTypeSpec_V2 = {
+            "name": data["dtype"],
+            "object_codec_id": object_codec_id,
+        }
+        dtype = get_data_type_from_json(dtype_spec, zarr_format=2)
+
         _data["dtype"] = dtype
         fill_value_encoded = _data.get("fill_value")
         if fill_value_encoded is not None:
@@ -216,8 +223,8 @@ def to_dict(self) -> dict[str, JSON]:
             fill_value = self.dtype.to_json_scalar(self.fill_value, zarr_format=2)
             zarray_dict["fill_value"] = fill_value
 
-        # serialize the dtype after fill value-specific JSON encoding
-        zarray_dict["dtype"] = self.dtype.to_json(zarr_format=2)  # type: ignore[assignment]
+        # pull the "name" attribute out of the dtype spec returned by self.dtype.to_json
+        zarray_dict["dtype"] = self.dtype.to_json(zarr_format=2)["name"]
 
         return zarray_dict
 
@@ -304,3 +311,21 @@ def parse_metadata(data: ArrayV2Metadata) -> ArrayV2Metadata:
         )
         raise ValueError(msg)
     return data
+
+
+def get_object_codec_id(maybe_object_codecs: Sequence[JSON]) -> str | None:
+    """
+    Inspect a sequence of codecs / filters for an "object codec", i.e. a codec
+    that can serialize object arrays to contiguous bytes. Zarr python
+    maintains a hard-coded set of object codec ids. If any element from the input
+    has an id that matches one of the hard-coded object codec ids, that id
+    is returned immediately.
+    """
+    object_codec_id = None
+    for maybe_object_codec in maybe_object_codecs:
+        if (
+            isinstance(maybe_object_codec, dict)
+            and maybe_object_codec.get("id") in OBJECT_CODEC_IDS
+        ):
+            return cast("str", maybe_object_codec["id"])
+    return object_codec_id
diff --git a/src/zarr/core/metadata/v3.py b/src/zarr/core/metadata/v3.py
index bd02a67084..84872d3dbd 100644
--- a/src/zarr/core/metadata/v3.py
+++ b/src/zarr/core/metadata/v3.py
@@ -4,11 +4,8 @@
 
 from zarr.abc.metadata import Metadata
 from zarr.core.buffer.core import default_buffer_prototype
-from zarr.core.dtype import (
-    VariableLengthUTF8,
-    ZDType,
-    get_data_type_from_json_v3,
-)
+from zarr.core.dtype import VariableLengthUTF8, ZDType, get_data_type_from_json
+from zarr.core.dtype.common import check_dtype_spec_v3
 
 if TYPE_CHECKING:
     from typing import Self
@@ -306,7 +303,9 @@ def from_dict(cls, data: dict[str, JSON]) -> Self:
         _ = parse_node_type_array(_data.pop("node_type"))
 
         data_type_json = _data.pop("data_type")
-        data_type = get_data_type_from_json_v3(data_type_json)
+        if not check_dtype_spec_v3(data_type_json):
+            raise ValueError(f"Invalid data_type: {data_type_json!r}")
+        data_type = get_data_type_from_json(data_type_json, zarr_format=3)
 
         # check that the fill value is consistent with the data type
         try:
diff --git a/tests/package_with_entrypoint/__init__.py b/tests/package_with_entrypoint/__init__.py
index 834d5654c0..e0d8a52c4d 100644
--- a/tests/package_with_entrypoint/__init__.py
+++ b/tests/package_with_entrypoint/__init__.py
@@ -1,5 +1,6 @@
-from collections.abc import Iterable
-from typing import Any, Literal, Self
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any, Literal, Self
 
 import numpy as np
 import numpy.typing as npt
@@ -7,11 +8,17 @@
 import zarr.core.buffer
 from zarr.abc.codec import ArrayBytesCodec, CodecInput, CodecPipeline
 from zarr.codecs import BytesCodec
-from zarr.core.array_spec import ArraySpec
 from zarr.core.buffer import Buffer, NDBuffer
-from zarr.core.common import JSON, ZarrFormat
+from zarr.core.dtype.common import DataTypeValidationError, DTypeJSON, DTypeSpec_V2
 from zarr.core.dtype.npy.bool import Bool
 
+if TYPE_CHECKING:
+    from collections.abc import Iterable
+    from typing import ClassVar, Literal
+
+    from zarr.core.array_spec import ArraySpec
+    from zarr.core.common import ZarrFormat
+
 
 class TestEntrypointCodec(ArrayBytesCodec):
     is_fixed_size = True
@@ -74,13 +81,21 @@ class TestDataType(Bool):
     This is a "data type" that serializes to "test"
     """
 
-    _zarr_v3_name = "test"  # type: ignore[assignment]
+    _zarr_v3_name: ClassVar[Literal["test"]] = "test"  # type: ignore[assignment]
 
     @classmethod
-    def from_json(cls, data: JSON, zarr_format: Literal[2, 3]) -> Self:
-        if data == cls._zarr_v3_name:  # type: ignore[has-type]
+    def from_json(cls, data: DTypeJSON, *, zarr_format: Literal[2, 3]) -> Self:
+        if zarr_format == 2 and data == {"name": cls._zarr_v3_name, "object_codec_id": None}:
             return cls()
-        raise ValueError
-
-    def to_json(self, zarr_format: ZarrFormat) -> str:  # type: ignore[override]
-        return self._zarr_v3_name  # type: ignore[no-any-return, has-type]
+        if zarr_format == 3 and data == cls._zarr_v3_name:
+            return cls()
+        raise DataTypeValidationError(
+            f"Invalid JSON representation of {cls.__name__}. Got {data!r}"
+        )
+
+    def to_json(self, zarr_format: ZarrFormat) -> str | DTypeSpec_V2:  # type: ignore[override]
+        if zarr_format == 2:
+            return {"name": self._zarr_v3_name, "object_codec_id": None}
+        if zarr_format == 3:
+            return self._zarr_v3_name
+        raise ValueError("zarr_format must be 2 or 3")
diff --git a/tests/test_dtype/test_npy/test_bool.py b/tests/test_dtype/test_npy/test_bool.py
index 03dc550a9d..010dec2e47 100644
--- a/tests/test_dtype/test_npy/test_bool.py
+++ b/tests/test_dtype/test_npy/test_bool.py
@@ -2,7 +2,7 @@
 
 import numpy as np
 
-from tests.test_dtype.test_wrapper import BaseTestZDType, V2JsonTestParams
+from tests.test_dtype.test_wrapper import BaseTestZDType
 from zarr.core.dtype.npy.bool import Bool
 
 
@@ -15,7 +15,7 @@ class TestBool(BaseTestZDType):
         np.dtype(np.float64),
         np.dtype(np.uint16),
     )
-    valid_json_v2 = (V2JsonTestParams(dtype="|b1"),)
+    valid_json_v2 = ({"name": "|b1", "object_codec_id": None},)
     valid_json_v3 = ("bool",)
     invalid_json_v2 = (
         "|b1",
diff --git a/tests/test_dtype/test_npy/test_bytes.py b/tests/test_dtype/test_npy/test_bytes.py
index 53636891cb..b7c16f573e 100644
--- a/tests/test_dtype/test_npy/test_bytes.py
+++ b/tests/test_dtype/test_npy/test_bytes.py
@@ -1,7 +1,7 @@
 import numpy as np
 import pytest
 
-from tests.test_dtype.test_wrapper import BaseTestZDType, V2JsonTestParams
+from tests.test_dtype.test_wrapper import BaseTestZDType
 from zarr.core.dtype.common import UnstableSpecificationWarning
 from zarr.core.dtype.npy.bytes import NullTerminatedBytes, RawBytes, VariableLengthBytes
 
@@ -15,9 +15,9 @@ class TestNullTerminatedBytes(BaseTestZDType):
         np.dtype("|U10"),
     )
     valid_json_v2 = (
-        V2JsonTestParams(dtype="|S0"),
-        V2JsonTestParams(dtype="|S2"),
-        V2JsonTestParams(dtype="|S4"),
+        {"name": "|S0", "object_codec_id": None},
+        {"name": "|S2", "object_codec_id": None},
+        {"name": "|S4", "object_codec_id": None},
     )
     valid_json_v3 = ({"name": "null_terminated_bytes", "configuration": {"length_bytes": 10}},)
     invalid_json_v2 = (
@@ -60,7 +60,7 @@ class TestRawBytes(BaseTestZDType):
         np.dtype(np.float64),
         np.dtype("|S10"),
     )
-    valid_json_v2 = (V2JsonTestParams(dtype="|V10"),)
+    valid_json_v2 = ({"name": "|V10", "object_codec_id": None},)
     valid_json_v3 = (
         {"name": "raw_bytes", "configuration": {"length_bytes": 0}},
         {"name": "raw_bytes", "configuration": {"length_bytes": 8}},
@@ -106,7 +106,7 @@ class TestVariableLengthBytes(BaseTestZDType):
         np.dtype(np.float64),
         np.dtype("|U10"),
     )
-    valid_json_v2 = (V2JsonTestParams(dtype="|O", object_codec_id="vlen-bytes"),)
+    valid_json_v2 = ({"name": "|O", "object_codec_id": "vlen-bytes"},)
     valid_json_v3 = ("variable_length_bytes",)
     invalid_json_v2 = (
         "|S",
diff --git a/tests/test_dtype/test_npy/test_complex.py b/tests/test_dtype/test_npy/test_complex.py
index fd216d8415..b6a1e799eb 100644
--- a/tests/test_dtype/test_npy/test_complex.py
+++ b/tests/test_dtype/test_npy/test_complex.py
@@ -4,7 +4,7 @@
 
 import numpy as np
 
-from tests.test_dtype.test_wrapper import BaseTestZDType, V2JsonTestParams
+from tests.test_dtype.test_wrapper import BaseTestZDType
 from zarr.core.dtype.npy.complex import Complex64, Complex128
 
 
@@ -23,7 +23,10 @@ class TestComplex64(_BaseTestFloat):
         np.dtype(np.float64),
         np.dtype(np.complex128),
     )
-    valid_json_v2 = (V2JsonTestParams(dtype=">c8"), V2JsonTestParams(dtype="<c8"))
+    valid_json_v2 = (
+        {"name": ">c8", "object_codec_id": None},
+        {"name": "<c8", "object_codec_id": None},
+    )
     valid_json_v3 = ("complex64",)
     invalid_json_v2 = (
         "|c8",
@@ -63,7 +66,10 @@ class TestComplex128(_BaseTestFloat):
         np.dtype(np.float64),
         np.dtype(np.complex64),
     )
-    valid_json_v2 = (V2JsonTestParams(dtype=">c16"), V2JsonTestParams(dtype="<c16"))
+    valid_json_v2 = (
+        {"name": ">c16", "object_codec_id": None},
+        {"name": "<c16", "object_codec_id": None},
+    )
     valid_json_v3 = ("complex128",)
     invalid_json_v2 = (
         "|c16",
diff --git a/tests/test_dtype/test_npy/test_float.py b/tests/test_dtype/test_npy/test_float.py
index 49d2899b52..e875dc87e3 100644
--- a/tests/test_dtype/test_npy/test_float.py
+++ b/tests/test_dtype/test_npy/test_float.py
@@ -2,7 +2,7 @@
 
 import numpy as np
 
-from tests.test_dtype.test_wrapper import BaseTestZDType, V2JsonTestParams
+from tests.test_dtype.test_wrapper import BaseTestZDType
 from zarr.core.dtype.npy.float import Float16, Float32, Float64
 
 
@@ -32,7 +32,10 @@ class TestFloat16(_BaseTestFloat):
         np.dtype(np.uint16),
         np.dtype(np.float32),
     )
-    valid_json_v2 = (V2JsonTestParams(dtype=">f2"), V2JsonTestParams(dtype="<f2"))
+    valid_json_v2 = (
+        {"name": ">f2", "object_codec_id": None},
+        {"name": "<f2", "object_codec_id": None},
+    )
     valid_json_v3 = ("float16",)
     invalid_json_v2 = (
         "|f2",
@@ -76,7 +79,10 @@ class TestFloat32(_BaseTestFloat):
         np.dtype(np.uint16),
         np.dtype(np.float64),
     )
-    valid_json_v2 = (V2JsonTestParams(dtype=">f4"), V2JsonTestParams(dtype="<f4"))
+    valid_json_v2 = (
+        {"name": ">f4", "object_codec_id": None},
+        {"name": "<f4", "object_codec_id": None},
+    )
     valid_json_v3 = ("float32",)
     invalid_json_v2 = (
         "|f4",
@@ -120,7 +126,10 @@ class TestFloat64(_BaseTestFloat):
         np.dtype(np.uint16),
         np.dtype(np.float32),
     )
-    valid_json_v2 = (V2JsonTestParams(dtype=">f8"), V2JsonTestParams(dtype="<f8"))
+    valid_json_v2 = (
+        {"name": ">f8", "object_codec_id": None},
+        {"name": "<f8", "object_codec_id": None},
+    )
     valid_json_v3 = ("float64",)
     invalid_json_v2 = (
         "|f8",
diff --git a/tests/test_dtype/test_npy/test_int.py b/tests/test_dtype/test_npy/test_int.py
index 5db65690dd..5887cb0d5e 100644
--- a/tests/test_dtype/test_npy/test_int.py
+++ b/tests/test_dtype/test_npy/test_int.py
@@ -2,7 +2,7 @@
 
 import numpy as np
 
-from tests.test_dtype.test_wrapper import BaseTestZDType, V2JsonTestParams
+from tests.test_dtype.test_wrapper import BaseTestZDType
 from zarr.core.dtype.npy.int import Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64
 
 
@@ -15,7 +15,7 @@ class TestInt8(BaseTestZDType):
         np.dtype(np.uint16),
         np.dtype(np.float64),
     )
-    valid_json_v2 = (V2JsonTestParams(dtype="|i1"),)
+    valid_json_v2 = ({"name": "|i1", "object_codec_id": None},)
     valid_json_v3 = ("int8",)
     invalid_json_v2 = (
         ">i1",
@@ -46,7 +46,10 @@ class TestInt16(BaseTestZDType):
         np.dtype(np.uint16),
         np.dtype(np.float64),
     )
-    valid_json_v2 = (V2JsonTestParams(dtype=">i2"), V2JsonTestParams(dtype="<i2"))
+    valid_json_v2 = (
+        {"name": ">i2", "object_codec_id": None},
+        {"name": "<i2", "object_codec_id": None},
+    )
     valid_json_v3 = ("int16",)
     invalid_json_v2 = (
         "|i2",
@@ -78,7 +81,10 @@ class TestInt32(BaseTestZDType):
         np.dtype(np.uint16),
         np.dtype(np.float64),
     )
-    valid_json_v2 = (V2JsonTestParams(dtype=">i4"), V2JsonTestParams(dtype="<i4"))
+    valid_json_v2 = (
+        {"name": ">i4", "object_codec_id": None},
+        {"name": "<i4", "object_codec_id": None},
+    )
     valid_json_v3 = ("int32",)
     invalid_json_v2 = (
         "|i4",
@@ -109,7 +115,10 @@ class TestInt64(BaseTestZDType):
         np.dtype(np.uint16),
         np.dtype(np.float64),
     )
-    valid_json_v2 = (V2JsonTestParams(dtype=">i8"), V2JsonTestParams(dtype="<i8"))
+    valid_json_v2 = (
+        {"name": ">i8", "object_codec_id": None},
+        {"name": "<i8", "object_codec_id": None},
+    )
     valid_json_v3 = ("int64",)
     invalid_json_v2 = (
         "|i8",
@@ -140,7 +149,7 @@ class TestUInt8(BaseTestZDType):
         np.dtype(np.int16),
         np.dtype(np.float64),
     )
-    valid_json_v2 = (V2JsonTestParams(dtype="|u1"),)
+    valid_json_v2 = ({"name": "|u1", "object_codec_id": None},)
     valid_json_v3 = ("uint8",)
     invalid_json_v2 = (
         "|u1",
@@ -171,7 +180,10 @@ class TestUInt16(BaseTestZDType):
         np.dtype(np.int16),
         np.dtype(np.float64),
     )
-    valid_json_v2 = (V2JsonTestParams(dtype=">u2"), V2JsonTestParams(dtype="<u2"))
+    valid_json_v2 = (
+        {"name": ">u2", "object_codec_id": None},
+        {"name": "<u2", "object_codec_id": None},
+    )
     valid_json_v3 = ("uint16",)
     invalid_json_v2 = (
         "|u2",
@@ -202,7 +214,10 @@ class TestUInt32(BaseTestZDType):
         np.dtype(np.int16),
         np.dtype(np.float64),
     )
-    valid_json_v2 = (V2JsonTestParams(dtype=">u4"), V2JsonTestParams(dtype="<u4"))
+    valid_json_v2 = (
+        {"name": ">u4", "object_codec_id": None},
+        {"name": "<u4", "object_codec_id": None},
+    )
     valid_json_v3 = ("uint32",)
     invalid_json_v2 = (
         "|u4",
@@ -233,7 +248,10 @@ class TestUInt64(BaseTestZDType):
         np.dtype(np.int16),
         np.dtype(np.float64),
     )
-    valid_json_v2 = (V2JsonTestParams(dtype=">u8"), V2JsonTestParams(dtype="<u8"))
+    valid_json_v2 = (
+        {"name": ">u8", "object_codec_id": None},
+        {"name": "<u8", "object_codec_id": None},
+    )
     valid_json_v3 = ("uint64",)
     invalid_json_v2 = (
         "|u8",
diff --git a/tests/test_dtype/test_npy/test_string.py b/tests/test_dtype/test_npy/test_string.py
index c9bcdce29f..51b1c3df3a 100644
--- a/tests/test_dtype/test_npy/test_string.py
+++ b/tests/test_dtype/test_npy/test_string.py
@@ -3,7 +3,7 @@
 import numpy as np
 import pytest
 
-from tests.test_dtype.test_wrapper import BaseTestZDType, V2JsonTestParams
+from tests.test_dtype.test_wrapper import BaseTestZDType
 from zarr.core.dtype import FixedLengthUTF32
 from zarr.core.dtype.common import UnstableSpecificationWarning
 from zarr.core.dtype.npy.string import _NUMPY_SUPPORTS_VLEN_STRING, VariableLengthUTF8
@@ -18,7 +18,7 @@ class TestVariableLengthString(BaseTestZDType):
             np.dtype(np.float64),
             np.dtype("|S10"),
         )
-        valid_json_v2 = (V2JsonTestParams(dtype="|O", object_codec_id="vlen-utf8"),)
+        valid_json_v2 = ({"name": "|O", "object_codec_id": "vlen-utf8"},)
         valid_json_v3 = ("variable_length_utf8",)
         invalid_json_v2 = (
             "|S10",
@@ -52,7 +52,7 @@ class TestVariableLengthString(BaseTestZDType):  # type: ignore[no-redef]
             np.dtype(np.float64),
             np.dtype("|S10"),
         )
-        valid_json_v2 = (V2JsonTestParams(dtype="|O", object_codec_id="vlen-utf8"),)
+        valid_json_v2 = ({"name": "|O", "object_codec_id": "vlen-utf8"},)
         valid_json_v3 = ("variable_length_utf8",)
         invalid_json_v2 = (
             "|S10",
@@ -86,7 +86,10 @@ class TestFixedLengthUTF32(BaseTestZDType):
         np.dtype(np.float64),
         np.dtype("|S10"),
     )
-    valid_json_v2 = (V2JsonTestParams(dtype=">U10"), V2JsonTestParams(dtype="<U10"))
+    valid_json_v2 = (
+        {"name": ">U10", "object_codec_id": None},
+        {"name": "<U10", "object_codec_id": None},
+    )
     valid_json_v3 = ({"name": "fixed_length_utf32", "configuration": {"length_bytes": 320}},)
     invalid_json_v2 = (
         "|U",
diff --git a/tests/test_dtype/test_npy/test_structured.py b/tests/test_dtype/test_npy/test_structured.py
index 71bbcdcefb..e9c9ab11d0 100644
--- a/tests/test_dtype/test_npy/test_structured.py
+++ b/tests/test_dtype/test_npy/test_structured.py
@@ -4,7 +4,7 @@
 
 import numpy as np
 
-from tests.test_dtype.test_wrapper import BaseTestZDType, V2JsonTestParams
+from tests.test_dtype.test_wrapper import BaseTestZDType
 from zarr.core.dtype import (
     Float16,
     Float64,
@@ -26,16 +26,16 @@ class TestStructured(BaseTestZDType):
         np.dtype("|S10"),
     )
     valid_json_v2 = (
-        V2JsonTestParams(dtype=[("field1", ">i4"), ("field2", ">f8")]),
-        V2JsonTestParams(dtype=[("field1", ">i8"), ("field2", ">i4")]),
+        {"name": [["field1", ">i4"], ["field2", ">f8"]], "object_codec_id": None},
+        {"name": [["field1", ">i8"], ["field2", ">i4"]], "object_codec_id": None},
     )
     valid_json_v3 = (
         {
             "name": "structured",
             "configuration": {
                 "fields": [
-                    ("field1", "int32"),
-                    ("field2", "float64"),
+                    ["field1", "int32"],
+                    ["field2", "float64"],
                 ]
             },
         },
@@ -43,17 +43,17 @@ class TestStructured(BaseTestZDType):
             "name": "structured",
             "configuration": {
                 "fields": [
-                    (
+                    [
                         "field1",
                         {
                             "name": "numpy.datetime64",
                             "configuration": {"unit": "s", "scale_factor": 1},
                         },
-                    ),
-                    (
+                    ],
+                    [
                         "field2",
                         {"name": "fixed_length_utf32", "configuration": {"length_bytes": 32}},
-                    ),
+                    ],
                 ]
             },
         },
diff --git a/tests/test_dtype/test_npy/test_time.py b/tests/test_dtype/test_npy/test_time.py
index 96281434cd..e201be5cf6 100644
--- a/tests/test_dtype/test_npy/test_time.py
+++ b/tests/test_dtype/test_npy/test_time.py
@@ -6,7 +6,7 @@
 import numpy as np
 import pytest
 
-from tests.test_dtype.test_wrapper import BaseTestZDType, V2JsonTestParams
+from tests.test_dtype.test_wrapper import BaseTestZDType
 from zarr.core.dtype.npy.common import DateTimeUnit
 from zarr.core.dtype.npy.time import DateTime64, TimeDelta64, datetime_from_int
 
@@ -35,10 +35,10 @@ class TestDateTime64(_TestTimeBase):
         np.dtype("timedelta64[ns]"),
     )
     valid_json_v2 = (
-        V2JsonTestParams(dtype=">M8"),
-        V2JsonTestParams(dtype=">M8[s]"),
-        V2JsonTestParams(dtype="<M8[10s]"),
-        V2JsonTestParams(dtype="<M8[10us]"),
+        {"name": ">M8", "object_codec_id": None},
+        {"name": ">M8[s]", "object_codec_id": None},
+        {"name": "<M8[10s]", "object_codec_id": None},
+        {"name": "<M8[10us]", "object_codec_id": None},
     )
     valid_json_v3 = (
         {"name": "numpy.datetime64", "configuration": {"unit": "ns", "scale_factor": 10}},
@@ -81,10 +81,10 @@ class TestTimeDelta64(_TestTimeBase):
     )
 
     valid_json_v2 = (
-        V2JsonTestParams(dtype=">m8"),
-        V2JsonTestParams(dtype=">m8[s]"),
-        V2JsonTestParams(dtype="<m8[10s]"),
-        V2JsonTestParams(dtype="<m8[10us]"),
+        {"name": ">m8", "object_codec_id": None},
+        {"name": ">m8[s]", "object_codec_id": None},
+        {"name": "<m8[10s]", "object_codec_id": None},
+        {"name": "<m8[10us]", "object_codec_id": None},
     )
     valid_json_v3 = (
         {"name": "numpy.timedelta64", "configuration": {"unit": "ns", "scale_factor": 10}},
diff --git a/tests/test_dtype/test_wrapper.py b/tests/test_dtype/test_wrapper.py
index d359475a0d..8f461f1a77 100644
--- a/tests/test_dtype/test_wrapper.py
+++ b/tests/test_dtype/test_wrapper.py
@@ -1,11 +1,10 @@
 from __future__ import annotations
 
-from dataclasses import dataclass
 from typing import TYPE_CHECKING, Any, ClassVar
 
 import pytest
 
-from zarr.core.dtype.common import HasItemSize
+from zarr.core.dtype.common import DTypeSpec_V2, DTypeSpec_V3, HasItemSize
 
 if TYPE_CHECKING:
     from zarr.core.dtype.wrapper import TBaseDType, TBaseScalar, ZDType
@@ -27,12 +26,6 @@ def test_schema(self, schema: json_schema.Schema) -> None:
 """
 
 
-@dataclass(frozen=True, kw_only=True, slots=True)
-class V2JsonTestParams:
-    dtype: str | dict[str, object] | list[object]
-    object_codec_id: str | None = None
-
-
 class BaseTestZDType:
     """
     A base class for testing ZDType subclasses. This class works in conjunction with the custom
@@ -73,10 +66,10 @@ class BaseTestZDType:
     valid_dtype: ClassVar[tuple[TBaseDType, ...]] = ()
     invalid_dtype: ClassVar[tuple[TBaseDType, ...]] = ()
 
-    valid_json_v2: ClassVar[tuple[V2JsonTestParams, ...]] = ()
+    valid_json_v2: ClassVar[tuple[DTypeSpec_V2, ...]] = ()
     invalid_json_v2: ClassVar[tuple[str | dict[str, object] | list[object], ...]] = ()
 
-    valid_json_v3: ClassVar[tuple[str | dict[str, object], ...]] = ()
+    valid_json_v3: ClassVar[tuple[DTypeSpec_V3, ...]] = ()
     invalid_json_v3: ClassVar[tuple[str | dict[str, object], ...]] = ()
 
     # for testing scalar round-trip serialization, we need a tuple of (data type json, scalar json)
@@ -108,16 +101,13 @@ def test_from_dtype_roundtrip(self, valid_dtype: Any) -> None:
         zdtype = self.test_cls.from_native_dtype(valid_dtype)
         assert zdtype.to_native_dtype() == valid_dtype
 
-    def test_from_json_roundtrip_v2(self, valid_json_v2: V2JsonTestParams) -> None:
-        zdtype = self.test_cls.from_json_v2(
-            valid_json_v2.dtype,  # type: ignore[arg-type]
-            object_codec_id=valid_json_v2.object_codec_id,
-        )
-        assert zdtype.to_json(zarr_format=2) == valid_json_v2.dtype
+    def test_from_json_roundtrip_v2(self, valid_json_v2: DTypeSpec_V2) -> None:
+        zdtype = self.test_cls.from_json(valid_json_v2, zarr_format=2)
+        assert zdtype.to_json(zarr_format=2) == valid_json_v2
 
     @pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning")
-    def test_from_json_roundtrip_v3(self, valid_json_v3: Any) -> None:
-        zdtype = self.test_cls.from_json_v3(valid_json_v3)
+    def test_from_json_roundtrip_v3(self, valid_json_v3: DTypeSpec_V3) -> None:
+        zdtype = self.test_cls.from_json(valid_json_v3, zarr_format=3)
         assert zdtype.to_json(zarr_format=3) == valid_json_v3
 
     def test_scalar_roundtrip_v2(self, scalar_v2_params: tuple[ZDType[Any, Any], Any]) -> None:
diff --git a/tests/test_dtype_registry.py b/tests/test_dtype_registry.py
index c4225874a4..c7d5f90065 100644
--- a/tests/test_dtype_registry.py
+++ b/tests/test_dtype_registry.py
@@ -23,10 +23,9 @@
     TBaseScalar,
     ZDType,
     data_type_registry,
-    get_data_type_from_json_v3,
+    get_data_type_from_json,
     parse_data_type,
 )
-from zarr.core.dtype.common import HasObjectCodec
 
 if TYPE_CHECKING:
     from collections.abc import Generator
@@ -85,14 +84,14 @@ def test_unregistered_dtype(data_type_registry_fixture: DataTypeRegistry) -> Non
         """
         Test that match_dtype raises an error if the dtype is not registered.
         """
-        outside_dtype = "int8"
-        with pytest.raises(
-            ValueError, match=f"No data type wrapper found that matches dtype '{outside_dtype}'"
-        ):
-            data_type_registry_fixture.match_dtype(np.dtype(outside_dtype))
+        outside_dtype_name = "int8"
+        outside_dtype = np.dtype(outside_dtype_name)
+        msg = f"No Zarr data type found that matches dtype '{outside_dtype!r}'"
+        with pytest.raises(ValueError, match=re.escape(msg)):
+            data_type_registry_fixture.match_dtype(outside_dtype)
 
         with pytest.raises(KeyError):
-            data_type_registry_fixture.get(outside_dtype)
+            data_type_registry_fixture.get(outside_dtype_name)
 
     @staticmethod
     @pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning")
@@ -110,23 +109,12 @@ def test_registered_dtypes_match_dtype(zdtype: ZDType[TBaseDType, TBaseScalar])
     def test_registered_dtypes_match_json(
         zdtype: ZDType[TBaseDType, TBaseScalar], zarr_format: ZarrFormat
     ) -> None:
-        if zarr_format == 2:
-            if isinstance(zdtype, HasObjectCodec):
-                object_codec_id = zdtype.object_codec_id
-            else:
-                object_codec_id = None
-            assert (
-                data_type_registry.match_json_v2(
-                    zdtype.to_json(zarr_format=zarr_format),  # type: ignore[arg-type]
-                    object_codec_id=object_codec_id,
-                )
-                == zdtype
-            )
-        else:
-            skip_object_dtype(zdtype)
-            assert (
-                data_type_registry.match_json_v3(zdtype.to_json(zarr_format=zarr_format)) == zdtype  # type: ignore[arg-type]
+        assert (
+            data_type_registry.match_json(
+                zdtype.to_json(zarr_format=zarr_format), zarr_format=zarr_format
             )
+            == zdtype
+        )
 
     @staticmethod
     @pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning")
@@ -148,14 +136,14 @@ def test_match_dtype_unique(
 
         dtype_instance = zdtype.to_native_dtype()
 
-        msg = f"No data type wrapper found that matches dtype '{dtype_instance}'"
+        msg = f"No Zarr data type found that matches dtype '{dtype_instance!r}'"
         with pytest.raises(ValueError, match=re.escape(msg)):
             data_type_registry_fixture.match_dtype(dtype_instance)
 
         instance_dict = zdtype.to_json(zarr_format=zarr_format)
-        msg = f"No data type wrapper found that matches {instance_dict}"
+        msg = f"No Zarr data type found that matches {instance_dict!r}"
         with pytest.raises(ValueError, match=re.escape(msg)):
-            data_type_registry_fixture.match_json_v3(instance_dict)  # type: ignore[arg-type]
+            data_type_registry_fixture.match_json(instance_dict, zarr_format=zarr_format)
 
 
 # this is copied from the registry tests -- we should deduplicate
@@ -181,7 +169,7 @@ def test_entrypoint_dtype(zarr_format: ZarrFormat) -> None:
     data_type_registry.lazy_load()
     instance = TestDataType()
     dtype_json = instance.to_json(zarr_format=zarr_format)
-    assert get_data_type_from_json_v3(dtype_json) == instance
+    assert get_data_type_from_json(dtype_json, zarr_format=zarr_format) == instance
     data_type_registry.unregister(TestDataType._zarr_v3_name)
 
 
diff --git a/tests/test_group.py b/tests/test_group.py
index 21cbe829a5..60a1fcb9bf 100644
--- a/tests/test_group.py
+++ b/tests/test_group.py
@@ -23,6 +23,7 @@
 from zarr.core._info import GroupInfo
 from zarr.core.buffer import default_buffer_prototype
 from zarr.core.config import config as zarr_config
+from zarr.core.dtype.common import unpack_dtype_json
 from zarr.core.dtype.npy.int import UInt8
 from zarr.core.group import (
     ConsolidatedMetadata,
@@ -516,7 +517,7 @@ def test_group_child_iterators(store: Store, zarr_format: ZarrFormat, consolidat
             metadata = {
                 "subarray": {
                     "attributes": {},
-                    "dtype": dtype.to_json(zarr_format=zarr_format),
+                    "dtype": unpack_dtype_json(dtype.to_json(zarr_format=zarr_format)),
                     "fill_value": fill_value,
                     "shape": (1,),
                     "chunks": (1,),
@@ -552,7 +553,7 @@ def test_group_child_iterators(store: Store, zarr_format: ZarrFormat, consolidat
                         {"configuration": {"endian": "little"}, "name": "bytes"},
                         {"configuration": {}, "name": "zstd"},
                     ),
-                    "data_type": dtype.to_json(zarr_format=zarr_format),
+                    "data_type": unpack_dtype_json(dtype.to_json(zarr_format=zarr_format)),
                     "fill_value": fill_value,
                     "node_type": "array",
                     "shape": (1,),

From 70da4da15c67d05070aff2d106f20e89e956de03 Mon Sep 17 00:00:00 2001
From: Davis Vann Bennett <davis.v.bennett@gmail.com>
Date: Sun, 15 Jun 2025 23:19:25 +0200
Subject: [PATCH 130/130] fix dtype doc example

---
 docs/user-guide/data_types.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/user-guide/data_types.rst b/docs/user-guide/data_types.rst
index 0150e025e3..87c8efc1f5 100644
--- a/docs/user-guide/data_types.rst
+++ b/docs/user-guide/data_types.rst
@@ -151,7 +151,7 @@ Serialize to JSON for Zarr V2 and V3
 
   >>> json_v2 = int8.to_json(zarr_format=2)
   >>> json_v2
-  '|i1'
+  {'name': '|i1', 'object_codec_id': None}
   >>> json_v3 = int8.to_json(zarr_format=3)
   >>> json_v3
   'int8'