diff --git a/doc/api.rst b/doc/api.rst index 33c8d9d3ceb..b897fbb58a0 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -189,6 +189,7 @@ Computation :py:attr:`~core.groupby.DatasetGroupBy.last` :py:attr:`~core.groupby.DatasetGroupBy.fillna` :py:attr:`~core.groupby.DatasetGroupBy.where` +:py:attr:`~core.groupby.DatasetGroupBy.quantile` Reshaping and reorganizing -------------------------- @@ -360,7 +361,7 @@ Computation :py:attr:`~core.groupby.DataArrayGroupBy.last` :py:attr:`~core.groupby.DataArrayGroupBy.fillna` :py:attr:`~core.groupby.DataArrayGroupBy.where` - +:py:attr:`~core.groupby.DataArrayGroupBy.quantile` Reshaping and reorganizing -------------------------- diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 373cb8d13dc..89c9dc43570 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -21,7 +21,8 @@ v0.12.2 (unreleased) Enhancements ~~~~~~~~~~~~ - +- New :py:meth:`~xarray.GroupBy.quantile` method. (:issue:`3018`) + By `David Huard `_. - Add ``keepdims`` argument for reduce operations (:issue:`2170`) By `Scott Wales `_. - netCDF chunksizes are now only dropped when original_shape is different, @@ -85,7 +86,7 @@ Bug fixes By `Maximilian Roos `_. - Fixed performance issues with cftime installed (:issue:`3000`) By `0x0L `_. -- Replace incorrect usages of `message` in pytest assertions +- Replace incorrect usages of `message` in pytest assertions with `match` (:issue:`3011`) By `Maximilian Roos `_. - Add explicit pytest markers, now required by pytest diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index d7dcb5b0426..108e85f729f 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -595,6 +595,64 @@ def _combine(self, applied, restore_coord_dims=False, shortcut=False): combined = self._maybe_unstack(combined) return combined + def quantile(self, q, dim=None, interpolation='linear', keep_attrs=None): + """Compute the qth quantile over each array in the groups and + concatenate them together into a new array. + + Parameters + ---------- + q : float in range of [0,1] (or sequence of floats) + Quantile to compute, which must be between 0 and 1 + inclusive. + dim : str or sequence of str, optional + Dimension(s) over which to apply quantile. + Defaults to the grouped dimension. + interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'} + This optional parameter specifies the interpolation method to + use when the desired quantile lies between two data points + ``i < j``: + * linear: ``i + (j - i) * fraction``, where ``fraction`` is + the fractional part of the index surrounded by ``i`` and + ``j``. + * lower: ``i``. + * higher: ``j``. + * nearest: ``i`` or ``j``, whichever is nearest. + * midpoint: ``(i + j) / 2``. + + Returns + ------- + quantiles : Variable + If `q` is a single quantile, then the result + is a scalar. If multiple percentiles are given, first axis of + the result corresponds to the quantile and a quantile dimension + is added to the return array. The other dimensions are the + dimensions that remain after the reduction of the array. + + See Also + -------- + numpy.nanpercentile, pandas.Series.quantile, Dataset.quantile, + DataArray.quantile + """ + if dim == DEFAULT_DIMS: + dim = ALL_DIMS + # TODO change this to dim = self._group_dim after + # the deprecation process + if self._obj.ndim > 1: + warnings.warn( + "Default reduction dimension will be changed to the " + "grouped dimension in a future version of xarray. To " + "silence this warning, pass dim=xarray.ALL_DIMS " + "explicitly.", + FutureWarning, stacklevel=2) + + out = self.apply(self._obj.__class__.quantile, shortcut=False, + q=q, dim=dim, interpolation=interpolation, + keep_attrs=keep_attrs) + + if np.asarray(q, dtype=np.float64).ndim == 0: + out = out.drop('quantile') + return out + def reduce(self, func, dim=None, axis=None, keep_attrs=None, shortcut=True, **kwargs): """Reduce the items in this group by applying `func` along some diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index b623c9bf05d..5433bd00f9d 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -105,4 +105,64 @@ def func(arg1, arg2, arg3=0): assert_identical(expected, actual) +def test_da_groupby_quantile(): + + array = xr.DataArray([1, 2, 3, 4, 5, 6], + [('x', [1, 1, 1, 2, 2, 2])]) + + # Scalar quantile + expected = xr.DataArray([2, 5], [('x', [1, 2])]) + actual = array.groupby('x').quantile(.5) + assert_identical(expected, actual) + + # Vector quantile + expected = xr.DataArray([[1, 3], [4, 6]], + [('x', [1, 2]), ('quantile', [0, 1])]) + actual = array.groupby('x').quantile([0, 1]) + assert_identical(expected, actual) + + # Multiple dimensions + array = xr.DataArray([[1, 11, 26], [2, 12, 22], [3, 13, 23], + [4, 16, 24], [5, 15, 25]], + [('x', [1, 1, 1, 2, 2],), + ('y', [0, 0, 1])]) + + actual_x = array.groupby('x').quantile(0) + expected_x = xr.DataArray([1, 4], + [('x', [1, 2]), ]) + assert_identical(expected_x, actual_x) + + actual_y = array.groupby('y').quantile(0) + expected_y = xr.DataArray([1, 22], + [('y', [0, 1]), ]) + assert_identical(expected_y, actual_y) + + actual_xx = array.groupby('x').quantile(0, dim='x') + expected_xx = xr.DataArray([[1, 11, 22], [4, 15, 24]], + [('x', [1, 2]), ('y', [0, 0, 1])]) + assert_identical(expected_xx, actual_xx) + + actual_yy = array.groupby('y').quantile(0, dim='y') + expected_yy = xr.DataArray([[1, 26], [2, 22], [3, 23], [4, 24], [5, 25]], + [('x', [1, 1, 1, 2, 2]), ('y', [0, 1])]) + assert_identical(expected_yy, actual_yy) + + times = pd.date_range('2000-01-01', periods=365) + x = [0, 1] + foo = xr.DataArray(np.reshape(np.arange(365 * 2), (365, 2)), + coords=dict(time=times, x=x), dims=('time', 'x')) + g = foo.groupby(foo.time.dt.month) + + actual = g.quantile(0) + expected = xr.DataArray([0., 62., 120., 182., 242., 304., + 364., 426., 488., 548., 610., 670.], + [('month', np.arange(1, 13))]) + assert_identical(expected, actual) + + actual = g.quantile(0, dim='time')[:2] + expected = xr.DataArray([[0., 1], [62., 63]], + [('month', [1, 2]), ('x', [0, 1])]) + assert_identical(expected, actual) + + # TODO: move other groupby tests from test_dataset and test_dataarray over here