|
27 | 27 | _normalize_path,
|
28 | 28 | )
|
29 | 29 | from xarray.backends.locks import _get_scheduler
|
30 |
| -from xarray.backends.zarr import open_zarr |
31 | 30 | from xarray.core import indexing
|
32 | 31 | from xarray.core.combine import (
|
33 | 32 | _infer_concat_order_from_positions,
|
@@ -1522,92 +1521,6 @@ def save_mfdataset(
|
1522 | 1521 | )
|
1523 | 1522 |
|
1524 | 1523 |
|
1525 |
| -def _auto_detect_region(ds_new, ds_orig, dim): |
1526 |
| - # Create a mapping array of coordinates to indices on the original array |
1527 |
| - coord = ds_orig[dim] |
1528 |
| - da_map = DataArray(np.arange(coord.size), coords={dim: coord}) |
1529 |
| - |
1530 |
| - try: |
1531 |
| - da_idxs = da_map.sel({dim: ds_new[dim]}) |
1532 |
| - except KeyError as e: |
1533 |
| - if "not all values found" in str(e): |
1534 |
| - raise KeyError( |
1535 |
| - f"Not all values of coordinate '{dim}' in the new array were" |
1536 |
| - " found in the original store. Writing to a zarr region slice" |
1537 |
| - " requires that no dimensions or metadata are changed by the write." |
1538 |
| - ) |
1539 |
| - else: |
1540 |
| - raise e |
1541 |
| - |
1542 |
| - if (da_idxs.diff(dim) != 1).any(): |
1543 |
| - raise ValueError( |
1544 |
| - f"The auto-detected region of coordinate '{dim}' for writing new data" |
1545 |
| - " to the original store had non-contiguous indices. Writing to a zarr" |
1546 |
| - " region slice requires that the new data constitute a contiguous subset" |
1547 |
| - " of the original store." |
1548 |
| - ) |
1549 |
| - |
1550 |
| - dim_slice = slice(da_idxs.values[0], da_idxs.values[-1] + 1) |
1551 |
| - |
1552 |
| - return dim_slice |
1553 |
| - |
1554 |
| - |
1555 |
| -def _auto_detect_regions(ds, region, open_kwargs): |
1556 |
| - ds_original = open_zarr(**open_kwargs) |
1557 |
| - for key, val in region.items(): |
1558 |
| - if val == "auto": |
1559 |
| - region[key] = _auto_detect_region(ds, ds_original, key) |
1560 |
| - return region |
1561 |
| - |
1562 |
| - |
1563 |
| -def _validate_and_autodetect_region(ds, region, mode, open_kwargs) -> dict[str, slice]: |
1564 |
| - if region == "auto": |
1565 |
| - region = {dim: "auto" for dim in ds.dims} |
1566 |
| - |
1567 |
| - if not isinstance(region, dict): |
1568 |
| - raise TypeError(f"``region`` must be a dict, got {type(region)}") |
1569 |
| - |
1570 |
| - if any(v == "auto" for v in region.values()): |
1571 |
| - if mode != "r+": |
1572 |
| - raise ValueError( |
1573 |
| - f"``mode`` must be 'r+' when using ``region='auto'``, got {mode}" |
1574 |
| - ) |
1575 |
| - region = _auto_detect_regions(ds, region, open_kwargs) |
1576 |
| - |
1577 |
| - for k, v in region.items(): |
1578 |
| - if k not in ds.dims: |
1579 |
| - raise ValueError( |
1580 |
| - f"all keys in ``region`` are not in Dataset dimensions, got " |
1581 |
| - f"{list(region)} and {list(ds.dims)}" |
1582 |
| - ) |
1583 |
| - if not isinstance(v, slice): |
1584 |
| - raise TypeError( |
1585 |
| - "all values in ``region`` must be slice objects, got " |
1586 |
| - f"region={region}" |
1587 |
| - ) |
1588 |
| - if v.step not in {1, None}: |
1589 |
| - raise ValueError( |
1590 |
| - "step on all slices in ``region`` must be 1 or None, got " |
1591 |
| - f"region={region}" |
1592 |
| - ) |
1593 |
| - |
1594 |
| - non_matching_vars = [ |
1595 |
| - k for k, v in ds.variables.items() if not set(region).intersection(v.dims) |
1596 |
| - ] |
1597 |
| - if non_matching_vars: |
1598 |
| - raise ValueError( |
1599 |
| - f"when setting `region` explicitly in to_zarr(), all " |
1600 |
| - f"variables in the dataset to write must have at least " |
1601 |
| - f"one dimension in common with the region's dimensions " |
1602 |
| - f"{list(region.keys())}, but that is not " |
1603 |
| - f"the case for some variables here. To drop these variables " |
1604 |
| - f"from this dataset before exporting to zarr, write: " |
1605 |
| - f".drop_vars({non_matching_vars!r})" |
1606 |
| - ) |
1607 |
| - |
1608 |
| - return region |
1609 |
| - |
1610 |
| - |
1611 | 1524 | def _validate_datatypes_for_zarr_append(zstore, dataset):
|
1612 | 1525 | """If variable exists in the store, confirm dtype of the data to append is compatible with
|
1613 | 1526 | existing dtype.
|
@@ -1768,24 +1681,6 @@ def to_zarr(
|
1768 | 1681 | # validate Dataset keys, DataArray names
|
1769 | 1682 | _validate_dataset_names(dataset)
|
1770 | 1683 |
|
1771 |
| - if region is not None: |
1772 |
| - open_kwargs = dict( |
1773 |
| - store=store, |
1774 |
| - synchronizer=synchronizer, |
1775 |
| - group=group, |
1776 |
| - consolidated=consolidated, |
1777 |
| - storage_options=storage_options, |
1778 |
| - zarr_version=zarr_version, |
1779 |
| - ) |
1780 |
| - region = _validate_and_autodetect_region(dataset, region, mode, open_kwargs) |
1781 |
| - # can't modify indexed with region writes |
1782 |
| - dataset = dataset.drop_vars(dataset.indexes) |
1783 |
| - if append_dim is not None and append_dim in region: |
1784 |
| - raise ValueError( |
1785 |
| - f"cannot list the same dimension in both ``append_dim`` and " |
1786 |
| - f"``region`` with to_zarr(), got {append_dim} in both" |
1787 |
| - ) |
1788 |
| - |
1789 | 1684 | if zarr_version is None:
|
1790 | 1685 | # default to 2 if store doesn't specify it's version (e.g. a path)
|
1791 | 1686 | zarr_version = int(getattr(store, "_store_version", 2))
|
@@ -1815,6 +1710,16 @@ def to_zarr(
|
1815 | 1710 | write_empty=write_empty_chunks,
|
1816 | 1711 | )
|
1817 | 1712 |
|
| 1713 | + if region is not None: |
| 1714 | + zstore._validate_and_autodetect_region(dataset) |
| 1715 | + # can't modify indexed with region writes |
| 1716 | + dataset = dataset.drop_vars(dataset.indexes) |
| 1717 | + if append_dim is not None and append_dim in region: |
| 1718 | + raise ValueError( |
| 1719 | + f"cannot list the same dimension in both ``append_dim`` and " |
| 1720 | + f"``region`` with to_zarr(), got {append_dim} in both" |
| 1721 | + ) |
| 1722 | + |
1818 | 1723 | if mode in ["a", "a-", "r+"]:
|
1819 | 1724 | _validate_datatypes_for_zarr_append(zstore, dataset)
|
1820 | 1725 | if append_dim is not None:
|
|
0 commit comments