zarrs · flying-sheep · May 22, 2025 · Feb 18, 2025 · May 12, 2025 · May 12, 2025
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -21,7 +21,7 @@ repos:
         language: system
         pass_filenames: false
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.7.2
+    rev: v0.11.9
     hooks:
     -   id: ruff
         args: ["--fix"]

diff --git a/Cargo.toml b/Cargo.toml
@@ -10,7 +10,7 @@ crate-type = ["cdylib", "rlib"]
 
 [dependencies]
 pyo3 = { version = "0.23.2", features = ["abi3-py311"] }
-zarrs = { version = "0.19.0", features = ["async"] }
+zarrs = { version = "0.20.0", features = ["async", "zlib", "pcodec", "bz2"] }
 rayon_iter_concurrent_limit = "0.2.0"
 rayon = "1.10.0"
 # fix for https://stackoverflow.com/questions/76593417/package-openssl-was-not-found-in-the-pkg-config-search-path
@@ -19,10 +19,9 @@ numpy = "0.23.0"
 unsafe_cell_slice = "0.2.0"
 serde_json = "1.0.128"
 pyo3-stub-gen = "0.7.0"
-opendal = { version = "0.51.0", features = ["services-http"] }
+opendal = { version = "0.53.0", features = ["services-http"] }
 tokio = { version = "1.41.1", features = ["rt-multi-thread"] }
-zarrs_opendal = "0.5.0"
-zarrs_metadata = "0.3.7" # require recent zarr-python compatibility fixes (remove with zarrs 0.20)
+zarrs_opendal = "0.7.2"
 itertools = "0.9.0"
 
 [profile.release]

diff --git a/pyproject.toml b/pyproject.toml
@@ -100,7 +100,7 @@ select = [
     "W",       # Warning detected by Pycodestyle
     "UP",      # pyupgrade
     "I",       # isort
-    "TCH",     # manage type checking blocks
+    "TC",      # manage type checking blocks
     "TID251",  # Banned imports
     "ICN",     # Follow import conventions
     "PTH",     # Pathlib instead of os.path

diff --git a/src/chunk_item.rs b/src/chunk_item.rs
@@ -10,7 +10,7 @@ use pyo3_stub_gen::derive::{gen_stub_pyclass, gen_stub_pymethods};
 use zarrs::{
     array::{ChunkRepresentation, DataType, FillValue},
     array_subset::ArraySubset,
-    metadata::v3::{array::data_type::DataTypeMetadataV3, MetadataV3},
+    metadata::v3::MetadataV3,
     storage::StoreKey,
 };
 
@@ -146,9 +146,11 @@ fn get_chunk_representation(
     fill_value: Vec<u8>,
 ) -> PyResult<ChunkRepresentation> {
     // Get the chunk representation
-    let data_type =
-        DataType::from_metadata(&DataTypeMetadataV3::from_metadata(&MetadataV3::new(dtype)))
-            .map_py_err::<PyRuntimeError>()?;
+    let data_type = DataType::from_metadata(
+        &MetadataV3::new(dtype),
+        zarrs::config::global_config().data_type_aliases_v3(),
+    )
+    .map_py_err::<PyRuntimeError>()?;
     let chunk_shape = chunk_shape
         .into_iter()
         .map(|x| NonZeroU64::new(x).expect("chunk shapes should always be non-zero"))

diff --git a/src/lib.rs b/src/lib.rs
@@ -22,7 +22,8 @@ use zarrs::array::codec::{
     ArrayPartialDecoderTraits, ArrayToBytesCodecTraits, CodecOptions, CodecOptionsBuilder,
 };
 use zarrs::array::{
-    copy_fill_value_into, update_array_bytes, ArrayBytes, ArraySize, CodecChain, FillValue,
+    copy_fill_value_into, update_array_bytes, ArrayBytes, ArrayBytesFixedDisjointView, ArraySize,
+    CodecChain, FillValue,
 };
 use zarrs::array_subset::ArraySubset;
 use zarrs::metadata::v3::MetadataV3;
@@ -114,7 +115,7 @@ impl CodecPipelineImpl {
         codec_options: &CodecOptions,
     ) -> PyResult<()> {
         let array_shape = item.representation().shape_u64();
-        if !chunk_subset.inbounds(&array_shape) {
+        if !chunk_subset.inbounds_shape(&array_shape) {
             return Err(PyErr::new::<PyValueError, _>(format!(
                 "chunk subset ({chunk_subset}) is out of bounds for array shape ({array_shape:?})"
             )));
@@ -134,20 +135,14 @@ impl CodecPipelineImpl {
             let chunk_bytes_old = self.retrieve_chunk_bytes(item, codec_chain, codec_options)?;
 
             // Update the chunk
-            let chunk_bytes_new = unsafe {
-                // SAFETY:
-                // - chunk_bytes_old is compatible with the chunk shape and data type size (validated on decoding)
-                // - chunk_subset is compatible with chunk_subset_bytes and the data type size (validated above)
-                // - chunk_subset is within the bounds of the chunk shape (validated above)
-                // - output bytes and output subset bytes are compatible (same data type)
-                update_array_bytes(
-                    chunk_bytes_old,
-                    &array_shape,
-                    chunk_subset,
-                    &chunk_subset_bytes,
-                    data_type_size,
-                )
-            };
+            let chunk_bytes_new = update_array_bytes(
+                chunk_bytes_old,
+                &array_shape,
+                chunk_subset,
+                &chunk_subset_bytes,
+                data_type_size,
+            )
+            .map_py_err::<PyRuntimeError>()?;
 
             // Store the updated chunk
             self.store_chunk_bytes(item, codec_chain, chunk_bytes_new, codec_options)
@@ -279,8 +274,8 @@ impl CodecPipelineImpl {
             .unique_by(|item| item.key())
             .collect::<Vec<_>>();
         let mut partial_decoder_cache: HashMap<StoreKey, Arc<dyn ArrayPartialDecoderTraits>> =
-            HashMap::new().into();
-        if partial_chunk_descriptions.len() > 0 {
+            HashMap::new();
+        if !partial_chunk_descriptions.is_empty() {
             let key_decoder_pairs = iter_concurrent_limit!(
                 chunk_concurrent_limit,
                 partial_chunk_descriptions,
@@ -308,59 +303,61 @@ impl CodecPipelineImpl {
             // For variable length data types, need a codepath with non `_into` methods.
             // Collect all the subsets and copy into value on the Python side?
             let update_chunk_subset = |item: chunk_item::WithSubset| {
+                let chunk_item::WithSubset {
+                    item,
+                    subset,
+                    chunk_subset,
+                } = item;
+                let mut output_view = unsafe {
+                    // TODO: Is the following correct?
+                    //       can we guarantee that when this function is called from Python with arbitrary arguments?
+                    // SAFETY: chunks represent disjoint array subsets
+                    ArrayBytesFixedDisjointView::new(
+                        output,
+                        // TODO: why is data_type in `item`, it should be derived from `output`, no?
+                        item.representation()
+                            .data_type()
+                            .fixed_size()
+                            .ok_or("variable length data type not supported")
+                            .map_py_err::<PyTypeError>()?,
+                        &output_shape,
+                        subset,
+                    )
+                    .map_py_err::<PyRuntimeError>()?
+                };
+
                 // See zarrs::array::Array::retrieve_chunk_subset_into
-                if is_whole_chunk(&item) {
+                if chunk_subset.start().iter().all(|&o| o == 0)
+                    && chunk_subset.shape() == item.representation().shape_u64()
+                {
                     // See zarrs::array::Array::retrieve_chunk_into
                     if let Some(chunk_encoded) = self.stores.get(&item)? {
                         // Decode the encoded data into the output buffer
                         let chunk_encoded: Vec<u8> = chunk_encoded.into();
-                        unsafe {
-                            // SAFETY:
-                            // - output is an array with output_shape elements of the item.representation data type,
-                            // - item.subset is within the bounds of output_shape.
-                            self.codec_chain.decode_into(
-                                Cow::Owned(chunk_encoded),
-                                item.representation(),
-                                &output,
-                                &output_shape,
-                                &item.subset,
-                                &codec_options,
-                            )
-                        }
+                        self.codec_chain.decode_into(
+                            Cow::Owned(chunk_encoded),
+                            item.representation(),
+                            &mut output_view,
+                            &codec_options,
+                        )
                     } else {
                         // The chunk is missing, write the fill value
-                        unsafe {
-                            // SAFETY:
-                            // - data type and fill value are confirmed to be compatible when the ChunkRepresentation is created,
-                            // - output is an array with output_shape elements of the item.representation data type,
-                            // - item.subset is within the bounds of output_shape.
-                            copy_fill_value_into(
-                                item.representation().data_type(),
-                                item.representation().fill_value(),
-                                &output,
-                                &output_shape,
-                                &item.subset,
-                            )
-                        }
+                        copy_fill_value_into(
+                            item.representation().data_type(),
+                            item.representation().fill_value(),
+                            &mut output_view,
+                        )
                     }
                 } else {
                     let key = item.key();
                     let partial_decoder = partial_decoder_cache.get(key).ok_or_else(|| {
                         PyRuntimeError::new_err(format!("Partial decoder not found for key: {key}"))
                     })?;
-                    unsafe {
-                        // SAFETY:
-                        // - output is an array with output_shape elements of the item.representation data type,
-                        // - item.subset is within the bounds of output_shape.
-                        // - item.chunk_subset has the same number of elements as item.subset.
-                        partial_decoder.partial_decode_into(
-                            &item.chunk_subset,
-                            &output,
-                            &output_shape,
-                            &item.subset,
-                            &codec_options,
-                        )
-                    }
+                    partial_decoder.partial_decode_into(
+                        &chunk_subset,
+                        &mut output_view,
+                        &codec_options,
+                    )
                 }
                 .map_py_err::<PyValueError>()
             };

diff --git a/src/metadata_v2.rs b/src/metadata_v2.rs
@@ -1,7 +1,7 @@
 use pyo3::{exceptions::PyRuntimeError, pyfunction, PyErr, PyResult};
 use zarrs::metadata::{
-    v2::{array::ArrayMetadataV2Order, MetadataV2},
-    v3::array::data_type::DataTypeMetadataV3,
+    v2::{ArrayMetadataV2Order, MetadataV2},
+    v3::MetadataV3,
 };
 
 #[pyfunction]
@@ -35,13 +35,15 @@ pub fn codec_metadata_v2_to_v3(
 
     // FIXME: The array order, dimensionality, data type, and endianness are needed to exhaustively support all Zarr V2 data that zarrs can handle.
     // However, CodecPipeline.from_codecs does not supply this information, and CodecPipeline.evolve_from_array_spec is seemingly never called.
-    let metadata = zarrs::metadata::v2_to_v3::codec_metadata_v2_to_v3(
+    let metadata = zarrs::metadata_ext::v2_to_v3::codec_metadata_v2_to_v3(
         ArrayMetadataV2Order::C,
-        0,                         // unused with C order
-        &DataTypeMetadataV3::Bool, // FIXME
+        0,                        // unused with C order
+        &MetadataV3::new("bool"), // FIXME
         None,
         &filters,
         &compressor,
+        zarrs::config::global_config().codec_aliases_v2(),
+        zarrs::config::global_config().codec_aliases_v3(),
     )
     .map_err(|err| {
         // TODO: More informative error messages from zarrs for ArrayMetadataV2ToV3ConversionError