|
16 | 16 | cftime_range,
|
17 | 17 | coding,
|
18 | 18 | conventions,
|
| 19 | + date_range, |
19 | 20 | decode_cf,
|
20 | 21 | )
|
21 | 22 | from xarray.coding.times import (
|
|
30 | 31 | from xarray.coding.variables import SerializationWarning
|
31 | 32 | from xarray.conventions import _update_bounds_attributes, cf_encoder
|
32 | 33 | from xarray.core.common import contains_cftime_datetimes
|
| 34 | +from xarray.core.pycompat import is_duck_dask_array |
33 | 35 | from xarray.testing import assert_equal, assert_identical
|
34 | 36 | from xarray.tests import (
|
35 | 37 | FirstElementAccessibleArray,
|
@@ -1387,3 +1389,139 @@ def test_roundtrip_float_times() -> None:
|
1387 | 1389 | assert_identical(var, decoded_var)
|
1388 | 1390 | assert decoded_var.encoding["units"] == units
|
1389 | 1391 | assert decoded_var.encoding["_FillValue"] == fill_value
|
| 1392 | + |
| 1393 | + |
| 1394 | +ENCODE_DATETIME64_VIA_DASK_TESTS = { |
| 1395 | + "pandas-encoding-with-prescribed-units-and-dtype": ( |
| 1396 | + "D", |
| 1397 | + "days since 1700-01-01", |
| 1398 | + np.dtype("int32"), |
| 1399 | + ), |
| 1400 | + "mixed-cftime-pandas-encoding-with-prescribed-units-and-dtype": ( |
| 1401 | + "252YS", |
| 1402 | + "days since 1700-01-01", |
| 1403 | + np.dtype("int32"), |
| 1404 | + ), |
| 1405 | + "pandas-encoding-with-default-units-and-dtype": ("252YS", None, None), |
| 1406 | +} |
| 1407 | + |
| 1408 | + |
| 1409 | +@requires_dask |
| 1410 | +@pytest.mark.parametrize( |
| 1411 | + ("freq", "units", "dtype"), |
| 1412 | + ENCODE_DATETIME64_VIA_DASK_TESTS.values(), |
| 1413 | + ids=ENCODE_DATETIME64_VIA_DASK_TESTS.keys(), |
| 1414 | +) |
| 1415 | +def test_encode_cf_datetime_datetime64_via_dask(freq, units, dtype): |
| 1416 | + import dask.array |
| 1417 | + |
| 1418 | + times = pd.date_range(start="1700", freq=freq, periods=3) |
| 1419 | + times = dask.array.from_array(times, chunks=1) |
| 1420 | + encoded_times, encoding_units, encoding_calendar = encode_cf_datetime( |
| 1421 | + times, units, None, dtype |
| 1422 | + ) |
| 1423 | + |
| 1424 | + assert is_duck_dask_array(encoded_times) |
| 1425 | + assert encoded_times.chunks == times.chunks |
| 1426 | + |
| 1427 | + if units is not None and dtype is not None: |
| 1428 | + assert encoding_units == units |
| 1429 | + assert encoded_times.dtype == dtype |
| 1430 | + else: |
| 1431 | + assert encoding_units == "nanoseconds since 1970-01-01" |
| 1432 | + assert encoded_times.dtype == np.dtype("int64") |
| 1433 | + |
| 1434 | + assert encoding_calendar == "proleptic_gregorian" |
| 1435 | + |
| 1436 | + decoded_times = decode_cf_datetime(encoded_times, encoding_units, encoding_calendar) |
| 1437 | + np.testing.assert_equal(decoded_times, times) |
| 1438 | + |
| 1439 | + |
| 1440 | +@requires_dask |
| 1441 | +@pytest.mark.parametrize( |
| 1442 | + ("units", "dtype"), [(None, np.dtype("int32")), ("2000-01-01", None)] |
| 1443 | +) |
| 1444 | +def test_encode_cf_datetime_via_dask_error(units, dtype): |
| 1445 | + import dask.array |
| 1446 | + |
| 1447 | + times = pd.date_range(start="1700", freq="D", periods=3) |
| 1448 | + times = dask.array.from_array(times, chunks=1) |
| 1449 | + |
| 1450 | + with pytest.raises(ValueError, match="When encoding chunked arrays"): |
| 1451 | + encode_cf_datetime(times, units, None, dtype) |
| 1452 | + |
| 1453 | + |
| 1454 | +ENCODE_CFTIME_DATETIME_VIA_DASK_TESTS = { |
| 1455 | + "prescribed-units-and-dtype": ("D", "days since 1700-01-01", np.dtype("int32")), |
| 1456 | + "default-units-and-dtype": ("252YS", None, None), |
| 1457 | +} |
| 1458 | + |
| 1459 | + |
| 1460 | +@requires_cftime |
| 1461 | +@requires_dask |
| 1462 | +@pytest.mark.parametrize( |
| 1463 | + "calendar", |
| 1464 | + ["standard", "proleptic_gregorian", "julian", "noleap", "all_leap", "360_day"], |
| 1465 | +) |
| 1466 | +@pytest.mark.parametrize( |
| 1467 | + ("freq", "units", "dtype"), |
| 1468 | + ENCODE_CFTIME_DATETIME_VIA_DASK_TESTS.values(), |
| 1469 | + ids=ENCODE_CFTIME_DATETIME_VIA_DASK_TESTS.keys(), |
| 1470 | +) |
| 1471 | +def test_encode_cf_datetime_cftime_datetime_via_dask(calendar, freq, units, dtype): |
| 1472 | + import dask.array |
| 1473 | + |
| 1474 | + times = cftime_range(start="1700", freq=freq, periods=3, calendar=calendar) |
| 1475 | + times = dask.array.from_array(times, chunks=1) |
| 1476 | + encoded_times, encoding_units, encoding_calendar = encode_cf_datetime( |
| 1477 | + times, units, None, dtype |
| 1478 | + ) |
| 1479 | + |
| 1480 | + assert is_duck_dask_array(encoded_times) |
| 1481 | + assert encoded_times.chunks == times.chunks |
| 1482 | + |
| 1483 | + if units is not None and dtype is not None: |
| 1484 | + assert encoding_units == units |
| 1485 | + assert encoded_times.dtype == dtype |
| 1486 | + else: |
| 1487 | + assert encoding_units == "microseconds since 1970-01-01" |
| 1488 | + assert encoded_times.dtype == np.int64 |
| 1489 | + |
| 1490 | + assert encoding_calendar == calendar |
| 1491 | + |
| 1492 | + decoded_times = decode_cf_datetime( |
| 1493 | + encoded_times, encoding_units, encoding_calendar, use_cftime=True |
| 1494 | + ) |
| 1495 | + np.testing.assert_equal(decoded_times, times) |
| 1496 | + |
| 1497 | + |
| 1498 | +@requires_dask |
| 1499 | +@pytest.mark.parametrize( |
| 1500 | + "use_cftime", [False, pytest.param(True, marks=requires_cftime)] |
| 1501 | +) |
| 1502 | +def test_encode_cf_datetime_via_dask_casting_value_error(use_cftime): |
| 1503 | + import dask.array |
| 1504 | + |
| 1505 | + times = date_range(start="2000", freq="12h", periods=3, use_cftime=use_cftime) |
| 1506 | + times = dask.array.from_array(times, chunks=1) |
| 1507 | + units = "days since 2000-01-01" |
| 1508 | + dtype = np.int64 |
| 1509 | + encoded_times, *_ = encode_cf_datetime(times, units, None, dtype) |
| 1510 | + with pytest.raises(ValueError, match="Not possible"): |
| 1511 | + encoded_times.compute() |
| 1512 | + |
| 1513 | + |
| 1514 | +@requires_dask |
| 1515 | +@pytest.mark.parametrize( |
| 1516 | + "use_cftime", [False, pytest.param(True, marks=requires_cftime)] |
| 1517 | +) |
| 1518 | +def test_encode_cf_datetime_via_dask_casting_overflow_error(use_cftime): |
| 1519 | + import dask.array |
| 1520 | + |
| 1521 | + times = date_range(start="1700", freq="252YS", periods=3, use_cftime=use_cftime) |
| 1522 | + times = dask.array.from_array(times, chunks=1) |
| 1523 | + units = "days since 1700-01-01" |
| 1524 | + dtype = np.dtype("float16") |
| 1525 | + encoded_times, *_ = encode_cf_datetime(times, units, None, dtype) |
| 1526 | + with pytest.raises(OverflowError, match="Not possible"): |
| 1527 | + encoded_times.compute() |
0 commit comments