Skip to content

Add quantile method to GroupBy #2828

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
Jun 24, 2019
3 changes: 2 additions & 1 deletion doc/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,7 @@ Computation
:py:attr:`~core.groupby.DatasetGroupBy.last`
:py:attr:`~core.groupby.DatasetGroupBy.fillna`
:py:attr:`~core.groupby.DatasetGroupBy.where`
:py:attr:`~core.groupby.DatasetGroupBy.quantile`

Reshaping and reorganizing
--------------------------
Expand Down Expand Up @@ -360,7 +361,7 @@ Computation
:py:attr:`~core.groupby.DataArrayGroupBy.last`
:py:attr:`~core.groupby.DataArrayGroupBy.fillna`
:py:attr:`~core.groupby.DataArrayGroupBy.where`

:py:attr:`~core.groupby.DataArrayGroupBy.quantile`

Reshaping and reorganizing
--------------------------
Expand Down
5 changes: 3 additions & 2 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ v0.12.2 (unreleased)
Enhancements
~~~~~~~~~~~~


- New :py:meth:`~xarray.GroupBy.quantile` method. (:issue:`3018`)
By `David Huard <https://github.com/huard>`_.
- Add ``keepdims`` argument for reduce operations (:issue:`2170`)
By `Scott Wales <https://github.com/ScottWales>`_.
- netCDF chunksizes are now only dropped when original_shape is different,
Expand Down Expand Up @@ -85,7 +86,7 @@ Bug fixes
By `Maximilian Roos <https://github.com/max-sixty>`_.
- Fixed performance issues with cftime installed (:issue:`3000`)
By `0x0L <https://github.com/0x0L>`_.
- Replace incorrect usages of `message` in pytest assertions
- Replace incorrect usages of `message` in pytest assertions
with `match` (:issue:`3011`)
By `Maximilian Roos <https://github.com/max-sixty>`_.
- Add explicit pytest markers, now required by pytest
Expand Down
58 changes: 58 additions & 0 deletions xarray/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -595,6 +595,64 @@ def _combine(self, applied, restore_coord_dims=False, shortcut=False):
combined = self._maybe_unstack(combined)
return combined

def quantile(self, q, dim=None, interpolation='linear', keep_attrs=None):
"""Compute the qth quantile over each array in the groups and
concatenate them together into a new array.

Parameters
----------
q : float in range of [0,1] (or sequence of floats)
Quantile to compute, which must be between 0 and 1
inclusive.
dim : str or sequence of str, optional
Dimension(s) over which to apply quantile.
Defaults to the grouped dimension.
interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}
This optional parameter specifies the interpolation method to
use when the desired quantile lies between two data points
``i < j``:
* linear: ``i + (j - i) * fraction``, where ``fraction`` is
the fractional part of the index surrounded by ``i`` and
``j``.
* lower: ``i``.
* higher: ``j``.
* nearest: ``i`` or ``j``, whichever is nearest.
* midpoint: ``(i + j) / 2``.

Returns
-------
quantiles : Variable
If `q` is a single quantile, then the result
is a scalar. If multiple percentiles are given, first axis of
the result corresponds to the quantile and a quantile dimension
is added to the return array. The other dimensions are the
dimensions that remain after the reduction of the array.

See Also
--------
numpy.nanpercentile, pandas.Series.quantile, Dataset.quantile,
DataArray.quantile
"""
if dim == DEFAULT_DIMS:
dim = ALL_DIMS
# TODO change this to dim = self._group_dim after
# the deprecation process
if self._obj.ndim > 1:
warnings.warn(
"Default reduction dimension will be changed to the "
"grouped dimension in a future version of xarray. To "
"silence this warning, pass dim=xarray.ALL_DIMS "
"explicitly.",
FutureWarning, stacklevel=2)

out = self.apply(self._obj.__class__.quantile, shortcut=False,
q=q, dim=dim, interpolation=interpolation,
keep_attrs=keep_attrs)

if np.asarray(q, dtype=np.float64).ndim == 0:
out = out.drop('quantile')
return out

def reduce(self, func, dim=None, axis=None, keep_attrs=None,
shortcut=True, **kwargs):
"""Reduce the items in this group by applying `func` along some
Expand Down
60 changes: 60 additions & 0 deletions xarray/tests/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,4 +105,64 @@ def func(arg1, arg2, arg3=0):
assert_identical(expected, actual)


def test_da_groupby_quantile():

array = xr.DataArray([1, 2, 3, 4, 5, 6],
[('x', [1, 1, 1, 2, 2, 2])])

# Scalar quantile
expected = xr.DataArray([2, 5], [('x', [1, 2])])
actual = array.groupby('x').quantile(.5)
assert_identical(expected, actual)

# Vector quantile
expected = xr.DataArray([[1, 3], [4, 6]],
[('x', [1, 2]), ('quantile', [0, 1])])
actual = array.groupby('x').quantile([0, 1])
assert_identical(expected, actual)

# Multiple dimensions
array = xr.DataArray([[1, 11, 26], [2, 12, 22], [3, 13, 23],
[4, 16, 24], [5, 15, 25]],
[('x', [1, 1, 1, 2, 2],),
('y', [0, 0, 1])])

actual_x = array.groupby('x').quantile(0)
expected_x = xr.DataArray([1, 4],
[('x', [1, 2]), ])
assert_identical(expected_x, actual_x)

actual_y = array.groupby('y').quantile(0)
expected_y = xr.DataArray([1, 22],
[('y', [0, 1]), ])
assert_identical(expected_y, actual_y)

actual_xx = array.groupby('x').quantile(0, dim='x')
expected_xx = xr.DataArray([[1, 11, 22], [4, 15, 24]],
[('x', [1, 2]), ('y', [0, 0, 1])])
assert_identical(expected_xx, actual_xx)

actual_yy = array.groupby('y').quantile(0, dim='y')
expected_yy = xr.DataArray([[1, 26], [2, 22], [3, 23], [4, 24], [5, 25]],
[('x', [1, 1, 1, 2, 2]), ('y', [0, 1])])
assert_identical(expected_yy, actual_yy)

times = pd.date_range('2000-01-01', periods=365)
x = [0, 1]
foo = xr.DataArray(np.reshape(np.arange(365 * 2), (365, 2)),
coords=dict(time=times, x=x), dims=('time', 'x'))
g = foo.groupby(foo.time.dt.month)

actual = g.quantile(0)
expected = xr.DataArray([0., 62., 120., 182., 242., 304.,
364., 426., 488., 548., 610., 670.],
[('month', np.arange(1, 13))])
assert_identical(expected, actual)

actual = g.quantile(0, dim='time')[:2]
expected = xr.DataArray([[0., 1], [62., 63]],
[('month', [1, 2]), ('x', [0, 1])])
assert_identical(expected, actual)


# TODO: move other groupby tests from test_dataset and test_dataarray over here