iridl · remicousin · Aug 1, 2023 · Aug 22, 2023 · Aug 22, 2023 · Aug 22, 2023
diff --git a/enacts/calc.py b/enacts/calc.py
@@ -1,6 +1,7 @@
 import numpy as np
 import pandas as pd
 import xarray as xr
+import datetime
 
 # Date Reading functions
 def read_zarr_data(zarr_path):
@@ -619,6 +620,103 @@ def daily_tobegroupedby_season(
     return daily_tobegroupedby_season
 
 
+def assign_season_coords(
+    time_series,
+    start_day,
+    start_month,
+    end_day,
+    end_month,
+    time_dim="T",
+):
+    """Assign season start and season end coordinates to this object,
+    mapping time dimension dates that belong to a season to their season start and end.
+    Data outside seasons are dropped.
+
+    Parameters
+    -----------
+    time_series : DataArray, Dataset
+        Time-dependent data to be grouped.
+    start_day : int
+        Day of the start date  of the season.
+    start_month : int
+        Month of the start date of the season.
+    end_day : int
+        Day of the end date of the season.
+    end_month : int
+        Day of the end date of the season.
+    time_dim : str, optional
+        Time coordinate in `time_series` (default `time_dim` ="T").
+
+    Returns
+    -------
+    time_series : DataArray, Dataset
+        `time_series` where days outside the seasons of interest are dropped
+        and with two additional coordinates `season_start` and `season_end`
+        that map the time dimension dates that belong to a season to
+        their season start and end respectively.
+
+    See Also
+    --------
+    xarray.DataArray.assign_coords, xarray.DataArray.groupby
+
+    Notes
+    -----
+    If the first (last) season is truncated, because `time_series` starts (ends) within it,
+    The first (last) element of `season_start` ( `season_end` ) differs from all others
+    and is the first (last) element of `time_dim` .
+    The additional coordinates are expected to be used by a groupby function.
+
+    Examples
+    --------
+    """
+    if (
+        (start_day == 29)
+        and (start_month == 2)
+        and ((~time_series[time_dim].dt.is_leap_year).sum() > 0)
+    ) :
+        raise Exception(
+            "if there is at least one non-leap year in time_coord, can not start on 29-Feb"
+        )
+    # ending season on 29-Feb means ending on 1-Mar with -1 day offset
+    if (end_day == 29 and end_month == 2) :
+        end_day = 1
+        end_month = 3
+        day_offset = -1
+    else:
+        day_offset = 0
+    # Create start_/end_edges pairing arrays
+    dense_time = time_series[time_dim].resample({time_dim: "1D"}).asfreq()
+    start_edges = sel_day_and_month(dense_time, start_day, start_month)
+    end_edges = sel_day_and_month(dense_time, end_day, end_month, offset=day_offset)
+    if start_edges[0] > end_edges[0] :
+        start_edges = xr.concat([time_series[time_dim][0], start_edges], dim=time_dim)
+    if end_edges[-1] < start_edges[-1] :
+        end_edges = xr.concat([end_edges, time_series[time_dim][-1]], dim=time_dim)
+    # dates mapped to the season start and end
+    season_start = xr.DataArray(np.piecewise(
+        time_series[time_dim],
+        [((time_series[time_dim] >= start_edges[t]) & (time_series[time_dim] <= end_edges[t])).values
+            for t in range(start_edges.size)],
+        [start_edges[t] for t in range(start_edges.size)]
+            + [pd.to_datetime([np.nan])],
+    ), dims=[time_dim], coords={time_dim: time_series[time_dim]})
+    season_end = xr.DataArray(np.piecewise(
+        time_series[time_dim],
+        [((time_series[time_dim] >= start_edges[t]) & (time_series[time_dim] <= end_edges[t])).values
+            for t in range(end_edges.size)],
+        [end_edges[t] for t in range(end_edges.size)]
+            + [pd.to_datetime([np.nan])],
+    ), dims=[time_dim], coords={time_dim: time_series[time_dim]})
+    # Drop days out of seasons and assign coords
+    return (time_series.where(~np.isnat(season_start), drop=True)
+        .assign_coords(
+            season_start=(time_dim, season_start[~np.isnat(season_start)].data)
+        )
+        .assign_coords(
+            season_end=(time_dim, season_end[~np.isnat(season_end)].data)
+        )
+    )
+
 # Seasonal Functions
 
 def seasonal_onset_date(

diff --git a/enacts/onset/tests/test_calc.py b/enacts/onset/tests/test_calc.py
@@ -311,6 +311,71 @@ def test_daily_tobegroupedby_season_picks_right_end_dates():
     ).all()
 
 
+def test_assign_season_coords():
+
+    t = pd.date_range(start="2000-01-01", end="2005-02-28", freq="1D")
+    data_var = xr.DataArray(range(t.size), dims=["T"], coords={"T": t})
+    data_set = xr.Dataset(
+        {"data_var": (["T"], data_var.data)},
+        coords={"T": t}
+    )
+    data_set = calc.assign_season_coords(data_set, 29, 11, 29, 2)
+
+    assert ((data_set["T"] >= data_set["season_start"])
+            & (data_set["T"] <= data_set["season_end"])).all()
+
+
+def test_assign_season_coords_season_start_29Feb():
+
+    t = pd.date_range(start="2000-01-01", end="2000-06-28", freq="1D")
+    data_var = xr.DataArray(range(t.size), dims=["T"], coords={"T": t})
+    data_var = calc.assign_season_coords(data_var, 29, 2, 29, 3)
+
+    assert ((data_var["T"] >= data_var["season_start"])
+            & (data_var["T"] <= data_var["season_end"])).all()
+
+
+def test_assign_season_coords_season_end_29Feb():
+
+    t = pd.date_range(start="2000-01-01", end="2001-03-02", freq="1D")
+    data_var = xr.DataArray(range(t.size), dims=["T"], coords={"T": t})
+    data_var = calc.assign_season_coords(data_var, 19, 1, 29, 2)
+
+    assert data_var["T"][-1].dt.day == 28
+
+
+def test_assign_season_coords_start_366before_end():
+
+    t = pd.date_range(start="2000-01-01", end="2005-02-28", freq="1D")
+    data_var = xr.DataArray(range(t.size), dims=["T"], coords={"T": t})
+    data_var = calc.assign_season_coords(data_var, 19, 11, 29, 2)
+
+    assert (
+        (data_var["season_end"] - data_var["season_start"]) <= pd.Timedelta(days=366)
+    ).all()
+
+
+def test_assign_season_coords_retains():
+
+    t = pd.date_range(start="2000-01-01", end="2005-02-28", freq="1D")
+    data_var = xr.DataArray(range(t.size), dims=["T"], coords={"T": t})
+    data_set = xr.Dataset(
+        {"data_var": (["T"], data_var.data)},
+        coords={"T": t}
+    )
+    data_set = calc.assign_season_coords(data_set, 29, 11, 29, 2)
+    expected_T = (
+        pd.date_range(start="2000-01-01", end="2000-02-29", freq="1D")
+        .union(pd.date_range(start="2000-11-29", end="2001-02-28", freq="1D"))
+        .union(pd.date_range(start="2001-11-29", end="2002-02-28", freq="1D"))
+        .union(pd.date_range(start="2002-11-29", end="2003-02-28", freq="1D"))
+        .union(pd.date_range(start="2003-11-29", end="2004-02-29", freq="1D"))
+        .union(pd.date_range(start="2004-11-29", end="2005-02-28", freq="1D"))
+    )
+
+    assert (data_set["T"] == expected_T).all()
+
+
 def test_seasonal_onset_date_keeps_returning_same_outputs():
 
     precip = data_test_calc.multi_year_data_sample()