Skip to content

ddof vs correction kwargs in std/var #8573

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion xarray/core/_aggregations.py
Original file line number Diff line number Diff line change
Expand Up @@ -3280,7 +3280,7 @@ def var(
dim: Dims = None,
*,
skipna: bool | None = None,
ddof: int = 0,
ddof: int | None = None,
keep_attrs: bool | None = None,
**kwargs: Any,
) -> Self:
Expand Down
42 changes: 41 additions & 1 deletion xarray/core/duck_array_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from collections.abc import Callable
from functools import partial
from importlib import import_module
from typing import Any

import numpy as np
import pandas as pd
Expand All @@ -29,6 +30,7 @@
from xarray.core import dask_array_compat, dask_array_ops, dtypes, nputils
from xarray.core.array_api_compat import get_array_namespace
from xarray.core.options import OPTIONS
from xarray.core.types import T_DuckArray
from xarray.core.utils import is_duck_array, is_duck_dask_array, module_available
from xarray.namedarray.parallelcompat import get_chunked_array_type
from xarray.namedarray.pycompat import array_type, is_chunked_array
Expand All @@ -43,7 +45,6 @@
normalize_axis_index,
)


dask_available = module_available("dask")


Expand Down Expand Up @@ -231,9 +232,9 @@
xp = get_array_namespace(data)
if xp == np:
# numpy currently doesn't have a astype:
return data.astype(dtype, **kwargs)

Check warning on line 235 in xarray/core/duck_array_ops.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.10

invalid value encountered in cast

Check warning on line 235 in xarray/core/duck_array_ops.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.10

invalid value encountered in cast

Check warning on line 235 in xarray/core/duck_array_ops.py

View workflow job for this annotation

GitHub Actions / macos-latest py3.10

invalid value encountered in cast

Check warning on line 235 in xarray/core/duck_array_ops.py

View workflow job for this annotation

GitHub Actions / macos-latest py3.10

invalid value encountered in cast

Check warning on line 235 in xarray/core/duck_array_ops.py

View workflow job for this annotation

GitHub Actions / windows-latest py3.10

invalid value encountered in cast

Check warning on line 235 in xarray/core/duck_array_ops.py

View workflow job for this annotation

GitHub Actions / windows-latest py3.10

invalid value encountered in cast

Check warning on line 235 in xarray/core/duck_array_ops.py

View workflow job for this annotation

GitHub Actions / windows-latest py3.10

invalid value encountered in cast

Check warning on line 235 in xarray/core/duck_array_ops.py

View workflow job for this annotation

GitHub Actions / windows-latest py3.10

invalid value encountered in cast
return xp.astype(data, dtype, **kwargs)
return data.astype(dtype, **kwargs)

Check warning on line 237 in xarray/core/duck_array_ops.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.10 bare-minimum

invalid value encountered in cast

Check warning on line 237 in xarray/core/duck_array_ops.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.10 bare-minimum

invalid value encountered in cast


def asarray(data, xp=np, dtype=None):
Expand Down Expand Up @@ -474,6 +475,9 @@
if coerce_strings and dtypes.is_string(values.dtype):
values = astype(values, object)

if name in ["std", "var"]:
kwargs = _handle_ddof_vs_correction_kwargs(kwargs, values)

func = None
if skipna or (
skipna is None
Expand Down Expand Up @@ -512,6 +516,42 @@
return f


def _handle_ddof_vs_correction_kwargs(
kwargs: dict[str, Any], values: T_DuckArray
) -> dict[str, Any]:
# handle ddof vs correction kwargs, see GH8566

ddof = kwargs.pop("ddof", None)
correction = kwargs.pop("correction", None)

if ddof is None and correction is None:
degrees_of_freedom_correction_val = 0 # default to 0, see GH8566
elif ddof is not None and correction is not None:
if ddof != correction:
raise ValueError(
"ddof and correction both refer to the same thing, and so cannot be meaningfully set to different values. "
f"Got ddof={ddof} but correction={correction}"
)
else:
# both kwargs were passed, but they are the same value
warnings.warn(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we be using emit_user_level_warning?

"ddof and correction both refer to the same thing - you don't need to pass them both"
)
Comment on lines +537 to +539
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
warnings.warn(
"ddof and correction both refer to the same thing - you don't need to pass them both"
)
emit_user_level_warning(
"ddof and correction both refer to the same thing - you don't need to pass them both"
)

degrees_of_freedom_correction_val = ddof
else:
# only one of the two kwargs passed
degrees_of_freedom_correction_val = ddof if ddof is not None else correction

# assume that only array-API-compliant libraries will implement correction over ddof
# reasonable since array-API people made this name change, see https://github.com/data-apis/array-api/issues/695
if hasattr(values, "__array_namespace__"):
kwargs["correction"] = degrees_of_freedom_correction_val
else:
kwargs["ddof"] = degrees_of_freedom_correction_val

return kwargs


# Attributes `numeric_only`, `available_min_count` is used for docs.
# See ops.inject_reduce_methods
argmax = _create_nan_agg_method("argmax", coerce_strings=True)
Expand Down
2 changes: 1 addition & 1 deletion xarray/core/nanops.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ def nanvar(a, axis=None, dtype=None, out=None, ddof=0):
return nputils.nanvar(a, axis=axis, dtype=dtype, ddof=ddof)


def nanstd(a, axis=None, dtype=None, out=None, ddof=0):
def nanstd(a, axis=None, dtype=None, out=None, ddof=None):
return nputils.nanstd(a, axis=axis, dtype=dtype, ddof=ddof)


Expand Down
2 changes: 1 addition & 1 deletion xarray/namedarray/_aggregations.py
Original file line number Diff line number Diff line change
Expand Up @@ -552,7 +552,7 @@ def std(
dim: Dims = None,
*,
skipna: bool | None = None,
ddof: int = 0,
ddof: int | None = None,
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I know that I'm supposed to regenerate these files instead of changing them directly, but I don't understand why when I change this directly ddof=0 still gets passed down explicitly.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can't reproduce that. What code do you use to see that behavior? However, if you tried Dataset.std, then the reason is that you didn't update the defaults on DatasetAggregations, yet.

**kwargs: Any,
) -> Self:
"""
Expand Down
42 changes: 42 additions & 0 deletions xarray/tests/test_array_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,3 +144,45 @@
actual = xr.where(xp_arr, 1, 0)
assert isinstance(actual.data, Array)
assert_equal(actual, expected)


def test_statistics(arrays) -> None:
with xr.set_options(use_bottleneck=False):
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Haven't tried to make this work with bottleneck / numbagg yet, so turning off bottleneck temporarily for simplicity.

np_arr_nans, xp_arr_nans = arrays

# TODO this is not really a test of the array API compatibility specifically, just of the ddof vs correction kwarg handling
expected = np_arr_nans.std(ddof=1)

Check failure on line 154 in xarray/tests/test_array_api.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.10

test_statistics TypeError: nanstd() got an unexpected keyword argument 'correction'

Check failure on line 154 in xarray/tests/test_array_api.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.12

test_statistics TypeError: nanstd() got an unexpected keyword argument 'correction'

Check failure on line 154 in xarray/tests/test_array_api.py

View workflow job for this annotation

GitHub Actions / macos-latest py3.10

test_statistics TypeError: nanstd() got an unexpected keyword argument 'correction'

Check failure on line 154 in xarray/tests/test_array_api.py

View workflow job for this annotation

GitHub Actions / macos-latest py3.12

test_statistics TypeError: nanstd() got an unexpected keyword argument 'correction'

Check failure on line 154 in xarray/tests/test_array_api.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.12 all-but-numba

test_statistics TypeError: nanstd() got an unexpected keyword argument 'correction'

Check failure on line 154 in xarray/tests/test_array_api.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.11 all-but-dask

test_statistics TypeError: nanstd() got an unexpected keyword argument 'correction'
actual = np_arr_nans.std(correction=1)

Check failure on line 155 in xarray/tests/test_array_api.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.10 min-all-deps

test_statistics ValueError: ddof and correction both refer to the same thing, and so cannot be meaningfully set to different values. Got ddof=0 but correction=1
assert_equal(actual, expected)

# test the default value
expected = np_arr_nans.std()
actual = np_arr_nans.std(correction=0)
assert_equal(actual, expected)

# TODO have to remove the NaNs in the example data because array API standard can't handle them yet, see https://github.com/pydata/xarray/pull/7424#issuecomment-1373979208
np_arr, xp_arr = np_arr_nans.fillna(0), xp_arr_nans.fillna(0)

expected = np_arr.std()
actual = xp_arr.std(skipna=False)
assert isinstance(actual.data, Array)
assert_equal(actual, expected)

expected = np_arr.std(ddof=1)
actual = xp_arr.std(skipna=False, ddof=1)
assert isinstance(actual.data, Array)
assert_equal(actual, expected)

expected = np_arr.std(ddof=1)
actual = xp_arr.std(skipna=False, correction=1)
assert isinstance(actual.data, Array)
assert_equal(actual, expected)

# TODO check this warns
expected = np_arr.std(ddof=1)
actual = xp_arr.std(skipna=False, ddof=1, correction=1)
Copy link
Collaborator

@keewis keewis Dec 30, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
actual = xp_arr.std(skipna=False, ddof=1, correction=1)
with pytest.warns(UserWarning, match="ddof and correction both refer to the same thing"):
actual = xp_arr.std(skipna=False, ddof=1, correction=1)

assert isinstance(actual.data, Array)
assert_equal(actual, expected)

with pytest.raises(ValueError):
xp_arr.std(skipna=False, ddof=1, correction=0)
Loading