diff --git a/enacts/calc.py b/enacts/calc.py index 7fa229422..2de88bc35 100644 --- a/enacts/calc.py +++ b/enacts/calc.py @@ -1,6 +1,7 @@ import numpy as np import pandas as pd import xarray as xr +import datetime # Date Reading functions def read_zarr_data(zarr_path): @@ -619,6 +620,103 @@ def daily_tobegroupedby_season( return daily_tobegroupedby_season +def assign_season_coords( + time_series, + start_day, + start_month, + end_day, + end_month, + time_dim="T", +): + """Assign season start and season end coordinates to this object, + mapping time dimension dates that belong to a season to their season start and end. + Data outside seasons are dropped. + + Parameters + ----------- + time_series : DataArray, Dataset + Time-dependent data to be grouped. + start_day : int + Day of the start date of the season. + start_month : int + Month of the start date of the season. + end_day : int + Day of the end date of the season. + end_month : int + Day of the end date of the season. + time_dim : str, optional + Time coordinate in `time_series` (default `time_dim` ="T"). + + Returns + ------- + time_series : DataArray, Dataset + `time_series` where days outside the seasons of interest are dropped + and with two additional coordinates `season_start` and `season_end` + that map the time dimension dates that belong to a season to + their season start and end respectively. + + See Also + -------- + xarray.DataArray.assign_coords, xarray.DataArray.groupby + + Notes + ----- + If the first (last) season is truncated, because `time_series` starts (ends) within it, + The first (last) element of `season_start` ( `season_end` ) differs from all others + and is the first (last) element of `time_dim` . + The additional coordinates are expected to be used by a groupby function. + + Examples + -------- + """ + if ( + (start_day == 29) + and (start_month == 2) + and ((~time_series[time_dim].dt.is_leap_year).sum() > 0) + ) : + raise Exception( + "if there is at least one non-leap year in time_coord, can not start on 29-Feb" + ) + # ending season on 29-Feb means ending on 1-Mar with -1 day offset + if (end_day == 29 and end_month == 2) : + end_day = 1 + end_month = 3 + day_offset = -1 + else: + day_offset = 0 + # Create start_/end_edges pairing arrays + dense_time = time_series[time_dim].resample({time_dim: "1D"}).asfreq() + start_edges = sel_day_and_month(dense_time, start_day, start_month) + end_edges = sel_day_and_month(dense_time, end_day, end_month, offset=day_offset) + if start_edges[0] > end_edges[0] : + start_edges = xr.concat([time_series[time_dim][0], start_edges], dim=time_dim) + if end_edges[-1] < start_edges[-1] : + end_edges = xr.concat([end_edges, time_series[time_dim][-1]], dim=time_dim) + # dates mapped to the season start and end + season_start = xr.DataArray(np.piecewise( + time_series[time_dim], + [((time_series[time_dim] >= start_edges[t]) & (time_series[time_dim] <= end_edges[t])).values + for t in range(start_edges.size)], + [start_edges[t] for t in range(start_edges.size)] + + [pd.to_datetime([np.nan])], + ), dims=[time_dim], coords={time_dim: time_series[time_dim]}) + season_end = xr.DataArray(np.piecewise( + time_series[time_dim], + [((time_series[time_dim] >= start_edges[t]) & (time_series[time_dim] <= end_edges[t])).values + for t in range(end_edges.size)], + [end_edges[t] for t in range(end_edges.size)] + + [pd.to_datetime([np.nan])], + ), dims=[time_dim], coords={time_dim: time_series[time_dim]}) + # Drop days out of seasons and assign coords + return (time_series.where(~np.isnat(season_start), drop=True) + .assign_coords( + season_start=(time_dim, season_start[~np.isnat(season_start)].data) + ) + .assign_coords( + season_end=(time_dim, season_end[~np.isnat(season_end)].data) + ) + ) + # Seasonal Functions def seasonal_onset_date( diff --git a/enacts/onset/tests/test_calc.py b/enacts/onset/tests/test_calc.py index 71ac18c4e..2ee16272a 100644 --- a/enacts/onset/tests/test_calc.py +++ b/enacts/onset/tests/test_calc.py @@ -311,6 +311,71 @@ def test_daily_tobegroupedby_season_picks_right_end_dates(): ).all() +def test_assign_season_coords(): + + t = pd.date_range(start="2000-01-01", end="2005-02-28", freq="1D") + data_var = xr.DataArray(range(t.size), dims=["T"], coords={"T": t}) + data_set = xr.Dataset( + {"data_var": (["T"], data_var.data)}, + coords={"T": t} + ) + data_set = calc.assign_season_coords(data_set, 29, 11, 29, 2) + + assert ((data_set["T"] >= data_set["season_start"]) + & (data_set["T"] <= data_set["season_end"])).all() + + +def test_assign_season_coords_season_start_29Feb(): + + t = pd.date_range(start="2000-01-01", end="2000-06-28", freq="1D") + data_var = xr.DataArray(range(t.size), dims=["T"], coords={"T": t}) + data_var = calc.assign_season_coords(data_var, 29, 2, 29, 3) + + assert ((data_var["T"] >= data_var["season_start"]) + & (data_var["T"] <= data_var["season_end"])).all() + + +def test_assign_season_coords_season_end_29Feb(): + + t = pd.date_range(start="2000-01-01", end="2001-03-02", freq="1D") + data_var = xr.DataArray(range(t.size), dims=["T"], coords={"T": t}) + data_var = calc.assign_season_coords(data_var, 19, 1, 29, 2) + + assert data_var["T"][-1].dt.day == 28 + + +def test_assign_season_coords_start_366before_end(): + + t = pd.date_range(start="2000-01-01", end="2005-02-28", freq="1D") + data_var = xr.DataArray(range(t.size), dims=["T"], coords={"T": t}) + data_var = calc.assign_season_coords(data_var, 19, 11, 29, 2) + + assert ( + (data_var["season_end"] - data_var["season_start"]) <= pd.Timedelta(days=366) + ).all() + + +def test_assign_season_coords_retains(): + + t = pd.date_range(start="2000-01-01", end="2005-02-28", freq="1D") + data_var = xr.DataArray(range(t.size), dims=["T"], coords={"T": t}) + data_set = xr.Dataset( + {"data_var": (["T"], data_var.data)}, + coords={"T": t} + ) + data_set = calc.assign_season_coords(data_set, 29, 11, 29, 2) + expected_T = ( + pd.date_range(start="2000-01-01", end="2000-02-29", freq="1D") + .union(pd.date_range(start="2000-11-29", end="2001-02-28", freq="1D")) + .union(pd.date_range(start="2001-11-29", end="2002-02-28", freq="1D")) + .union(pd.date_range(start="2002-11-29", end="2003-02-28", freq="1D")) + .union(pd.date_range(start="2003-11-29", end="2004-02-29", freq="1D")) + .union(pd.date_range(start="2004-11-29", end="2005-02-28", freq="1D")) + ) + + assert (data_set["T"] == expected_T).all() + + def test_seasonal_onset_date_keeps_returning_same_outputs(): precip = data_test_calc.multi_year_data_sample()