From c21bd5dcfc232cd54fab267fa44c544ea0ff1446 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dominik=20Sta=C5=84czak?= Date: Tue, 26 Apr 2022 15:09:07 +0200 Subject: [PATCH 01/11] Add allow_failures flag to Dataset.curve_fit --- xarray/core/dataset.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 81860bede95..b1b3854ff91 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -8631,6 +8631,7 @@ def curvefit( p0: dict[str, float | DataArray] | None = None, bounds: dict[str, tuple[float | DataArray, float | DataArray]] | None = None, param_names: Sequence[str] | None = None, + allow_failures: bool = False, kwargs: dict[str, Any] | None = None, ) -> T_Dataset: """ @@ -8675,6 +8676,12 @@ def curvefit( this will be automatically determined by arguments of `func`. `param_names` should be manually supplied when fitting a function that takes a variable number of parameters. + allow_failures: bool, optional + If optionally set, then if the underlying `scipy.optimize_curve_fit` + optimization fails for any of the fits, return nan in coefficients + and covariances for those cases. + Helpful when fitting multiple curves and some of the data + just doesn't fit your model. Default is false. **kwargs : optional Additional keyword arguments to passed to scipy curve_fit. @@ -8793,7 +8800,15 @@ def _wrapper(Y, *args, **kwargs): pcov = np.full([n_params, n_params], np.nan) return popt, pcov x = np.squeeze(x) - popt, pcov = curve_fit(func, x, y, p0=p0_, bounds=(lb, ub), **kwargs) + + try: + popt, pcov = curve_fit(func, x, y, p0=p0_, bounds=(lb, ub), **kwargs) + except RuntimeError: + if not allow_failures: + raise + popt = np.full([n_params], np.nan) + pcov = np.full([n_params, n_params], np.nan) + return popt, pcov result = type(self)() From f4421a81e93426e328939a09910c20166597afb5 Mon Sep 17 00:00:00 2001 From: mgunyho <20118130+mgunyho@users.noreply.github.com> Date: Sun, 4 Jun 2023 12:19:39 +0300 Subject: [PATCH 02/11] Reword docstring --- xarray/core/dataset.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index b1b3854ff91..16ff3860d79 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -8676,12 +8676,11 @@ def curvefit( this will be automatically determined by arguments of `func`. `param_names` should be manually supplied when fitting a function that takes a variable number of parameters. - allow_failures: bool, optional - If optionally set, then if the underlying `scipy.optimize_curve_fit` - optimization fails for any of the fits, return nan in coefficients - and covariances for those cases. - Helpful when fitting multiple curves and some of the data - just doesn't fit your model. Default is false. + allow_failures: bool, default: False + If True and the underlying `scipy.optimize_curve_fit` optimization fails for + any of the fits, return NaN in coefficients and covariances for those + coordinates. Helpful when fitting multiple curves and some of the data just + doesn't fit your model. **kwargs : optional Additional keyword arguments to passed to scipy curve_fit. From 7250833c8622369071aa695ea5445271df71a518 Mon Sep 17 00:00:00 2001 From: mgunyho <20118130+mgunyho@users.noreply.github.com> Date: Sun, 4 Jun 2023 12:21:14 +0300 Subject: [PATCH 03/11] Add allow_failures flag also to DataArray --- xarray/core/dataarray.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index ea6c1684f0d..09aec60e2c9 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -6162,6 +6162,7 @@ def curvefit( p0: dict[str, float | DataArray] | None = None, bounds: dict[str, tuple[float | DataArray, float | DataArray]] | None = None, param_names: Sequence[str] | None = None, + allow_failures: bool = False, kwargs: dict[str, Any] | None = None, ) -> Dataset: """ @@ -6206,6 +6207,11 @@ def curvefit( this will be automatically determined by arguments of `func`. `param_names` should be manually supplied when fitting a function that takes a variable number of parameters. + allow_failures: bool, default: False + If True and the underlying `scipy.optimize_curve_fit` optimization fails for + any of the fits, return NaN in coefficients and covariances for those + coordinates. Helpful when fitting multiple curves and some of the data just + doesn't fit your model. **kwargs : optional Additional keyword arguments to passed to scipy curve_fit. @@ -6310,6 +6316,7 @@ def curvefit( p0=p0, bounds=bounds, param_names=param_names, + allow_failures=allow_failures, kwargs=kwargs, ) From b15ae880ea2b085f7a1f7fb9447ebf427fba177e Mon Sep 17 00:00:00 2001 From: mgunyho <20118130+mgunyho@users.noreply.github.com> Date: Sun, 4 Jun 2023 12:27:10 +0300 Subject: [PATCH 04/11] Add unit test for curvefit with allow_failures --- xarray/tests/test_dataarray.py | 42 ++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index e3138a32716..c183a70fc24 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -4571,6 +4571,48 @@ def sine(t, a, f, p): bounds={"a": (0, DataArray([1], coords={"foo": [1]}))}, ) + @requires_scipy + @pytest.mark.parametrize("use_dask", [True, False]) + def test_curvefit_allow_failures(self, use_dask: bool) -> None: + if use_dask and not has_dask: + pytest.skip("requires dask") + + # nonsense function to make the optimization fail + def line(x, a, b): + if a > 10: + return 0 + return a * x + b + + da = DataArray( + [[1, 3, 5], [0, 20, 40]], + coords={"i": [1, 2], "x": [0.0, 1.0, 2.0]}, + ) + + if use_dask: + da = da.chunk({"i": 1}) + + expected = DataArray( + [[2, 1], [np.nan, np.nan]], coords={"i": [1, 2], "param": ["a", "b"]} + ) + + with pytest.raises(RuntimeError): + da.curvefit( + coords="x", + func=line, + # limit maximum number of calls so the optimization fails + kwargs=dict(maxfev=5), + ) + + fit = da.curvefit( + coords="x", + func=line, + allow_failures=True, + # limit maximum number of calls so the optimization fails + kwargs=dict(maxfev=5), + ) + + assert_allclose(fit.curvefit_coefficients, expected) + class TestReduce: @pytest.fixture(autouse=True) From c25ecaaa4c4ba28a2586aee90ce35467c0a6b50c Mon Sep 17 00:00:00 2001 From: mgunyho <20118130+mgunyho@users.noreply.github.com> Date: Sun, 4 Jun 2023 12:34:59 +0300 Subject: [PATCH 05/11] Update whats-new MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Dominik Stańczak --- doc/whats-new.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index b03388cb551..95a0d73ca4b 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -25,6 +25,10 @@ New Features - Added support for multidimensional initial guess and bounds in :py:meth:`DataArray.curvefit` (:issue:`7768`, :pull:`7821`). By `András Gunyhó `_. +- Add a ``allow_failures`` flag to :py:meth:`Dataset.curve_fit` that allows + returning NaN for the parameters and covariances of failed fits, rather than + failing the whole series of fits (:issue:`6317`). + By `Dominik Stańczak `_ and `András Gunyhó `_. Breaking changes ~~~~~~~~~~~~~~~~ From 8324f7b6adabdb12c239a05e176bb168c28f66a2 Mon Sep 17 00:00:00 2001 From: mgunyho <20118130+mgunyho@users.noreply.github.com> Date: Sun, 4 Jun 2023 12:46:48 +0300 Subject: [PATCH 06/11] Add PR to whats-new --- doc/whats-new.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 95a0d73ca4b..cc447c11cb6 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -27,7 +27,7 @@ New Features By `András Gunyhó `_. - Add a ``allow_failures`` flag to :py:meth:`Dataset.curve_fit` that allows returning NaN for the parameters and covariances of failed fits, rather than - failing the whole series of fits (:issue:`6317`). + failing the whole series of fits (:issue:`6317`, :pull:`7891`). By `Dominik Stańczak `_ and `András Gunyhó `_. Breaking changes From d3decbbdf7731c51eb0a10ac413e89048a117fb0 Mon Sep 17 00:00:00 2001 From: mgunyho <20118130+mgunyho@users.noreply.github.com> Date: Sun, 4 Jun 2023 13:00:00 +0300 Subject: [PATCH 07/11] Update docstring --- xarray/core/dataarray.py | 3 +-- xarray/core/dataset.py | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 09aec60e2c9..8b8bd9fdbd6 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -6210,8 +6210,7 @@ def curvefit( allow_failures: bool, default: False If True and the underlying `scipy.optimize_curve_fit` optimization fails for any of the fits, return NaN in coefficients and covariances for those - coordinates. Helpful when fitting multiple curves and some of the data just - doesn't fit your model. + coordinates. **kwargs : optional Additional keyword arguments to passed to scipy curve_fit. diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 16ff3860d79..846000219a7 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -8679,8 +8679,7 @@ def curvefit( allow_failures: bool, default: False If True and the underlying `scipy.optimize_curve_fit` optimization fails for any of the fits, return NaN in coefficients and covariances for those - coordinates. Helpful when fitting multiple curves and some of the data just - doesn't fit your model. + coordinates. **kwargs : optional Additional keyword arguments to passed to scipy curve_fit. From efce0b328c9766456db33c9ec3d6b5ea7be0a7dd Mon Sep 17 00:00:00 2001 From: mgunyho <20118130+mgunyho@users.noreply.github.com> Date: Sun, 4 Jun 2023 20:51:53 +0300 Subject: [PATCH 08/11] Rename allow_failures to errors to be consistent with other methods --- xarray/core/dataarray.py | 12 ++++++------ xarray/core/dataset.py | 15 +++++++++------ xarray/tests/test_dataarray.py | 4 ++-- 3 files changed, 17 insertions(+), 14 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 8b8bd9fdbd6..15f6ab95d6e 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -6162,7 +6162,7 @@ def curvefit( p0: dict[str, float | DataArray] | None = None, bounds: dict[str, tuple[float | DataArray, float | DataArray]] | None = None, param_names: Sequence[str] | None = None, - allow_failures: bool = False, + errors: ErrorOptions = "raise", kwargs: dict[str, Any] | None = None, ) -> Dataset: """ @@ -6207,10 +6207,10 @@ def curvefit( this will be automatically determined by arguments of `func`. `param_names` should be manually supplied when fitting a function that takes a variable number of parameters. - allow_failures: bool, default: False - If True and the underlying `scipy.optimize_curve_fit` optimization fails for - any of the fits, return NaN in coefficients and covariances for those - coordinates. + errors : {"raise", "ignore"}, default: "raise" + If 'raise', any errors from the `scipy.optimize_curve_fit` optimization will + raise an exception. If 'ignore', the coefficients and covariances for the + coordinates where the fitting failed will be NaN. **kwargs : optional Additional keyword arguments to passed to scipy curve_fit. @@ -6315,7 +6315,7 @@ def curvefit( p0=p0, bounds=bounds, param_names=param_names, - allow_failures=allow_failures, + errors=errors, kwargs=kwargs, ) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 846000219a7..c4091a0a819 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -8631,7 +8631,7 @@ def curvefit( p0: dict[str, float | DataArray] | None = None, bounds: dict[str, tuple[float | DataArray, float | DataArray]] | None = None, param_names: Sequence[str] | None = None, - allow_failures: bool = False, + errors: ErrorOptions = "raise", kwargs: dict[str, Any] | None = None, ) -> T_Dataset: """ @@ -8676,10 +8676,10 @@ def curvefit( this will be automatically determined by arguments of `func`. `param_names` should be manually supplied when fitting a function that takes a variable number of parameters. - allow_failures: bool, default: False - If True and the underlying `scipy.optimize_curve_fit` optimization fails for - any of the fits, return NaN in coefficients and covariances for those - coordinates. + errors : {"raise", "ignore"}, default: "raise" + If 'raise', any errors from the `scipy.optimize_curve_fit` optimization will + raise an exception. If 'ignore', the coefficients and covariances for the + coordinates where the fitting failed will be NaN. **kwargs : optional Additional keyword arguments to passed to scipy curve_fit. @@ -8762,6 +8762,9 @@ def curvefit( f"dimensions {preserved_dims}." ) + if errors not in ["raise", "ignore"]: + raise ValueError('errors must be either "raise" or "ignore"') + # Broadcast all coords with each other coords_ = broadcast(*coords_) coords_ = [ @@ -8802,7 +8805,7 @@ def _wrapper(Y, *args, **kwargs): try: popt, pcov = curve_fit(func, x, y, p0=p0_, bounds=(lb, ub), **kwargs) except RuntimeError: - if not allow_failures: + if errors == "raise": raise popt = np.full([n_params], np.nan) pcov = np.full([n_params, n_params], np.nan) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index c183a70fc24..368f518abf2 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -4573,7 +4573,7 @@ def sine(t, a, f, p): @requires_scipy @pytest.mark.parametrize("use_dask", [True, False]) - def test_curvefit_allow_failures(self, use_dask: bool) -> None: + def test_curvefit_ignore_errors(self, use_dask: bool) -> None: if use_dask and not has_dask: pytest.skip("requires dask") @@ -4606,7 +4606,7 @@ def line(x, a, b): fit = da.curvefit( coords="x", func=line, - allow_failures=True, + errors="ignore", # limit maximum number of calls so the optimization fails kwargs=dict(maxfev=5), ) From f61cb4f45052e6ad5a98ec336b87223246d9f2b7 Mon Sep 17 00:00:00 2001 From: mgunyho <20118130+mgunyho@users.noreply.github.com> Date: Sun, 4 Jun 2023 21:26:53 +0300 Subject: [PATCH 09/11] Compute array so test passes also with dask --- xarray/tests/test_dataarray.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 368f518abf2..68a4047b0ae 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -4601,7 +4601,7 @@ def line(x, a, b): func=line, # limit maximum number of calls so the optimization fails kwargs=dict(maxfev=5), - ) + ).compute() # have to compute to raise the error fit = da.curvefit( coords="x", @@ -4609,7 +4609,7 @@ def line(x, a, b): errors="ignore", # limit maximum number of calls so the optimization fails kwargs=dict(maxfev=5), - ) + ).compute() assert_allclose(fit.curvefit_coefficients, expected) From d8112347c7d09687b442591a70a6db22cfb8280b Mon Sep 17 00:00:00 2001 From: mgunyho <20118130+mgunyho@users.noreply.github.com> Date: Sun, 4 Jun 2023 21:39:17 +0300 Subject: [PATCH 10/11] Check error message --- xarray/tests/test_dataarray.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 68a4047b0ae..de013c684cb 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -4595,7 +4595,7 @@ def line(x, a, b): [[2, 1], [np.nan, np.nan]], coords={"i": [1, 2], "param": ["a", "b"]} ) - with pytest.raises(RuntimeError): + with pytest.raises(RuntimeError, match="calls to function has reached maxfev"): da.curvefit( coords="x", func=line, From 9a19f6a5d50b6509b8a0acb4c87f6b86edde66f8 Mon Sep 17 00:00:00 2001 From: mgunyho <20118130+mgunyho@users.noreply.github.com> Date: Sun, 4 Jun 2023 21:50:37 +0300 Subject: [PATCH 11/11] Update whats-new --- doc/whats-new.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index cc447c11cb6..f72c8a6fd5c 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -25,7 +25,7 @@ New Features - Added support for multidimensional initial guess and bounds in :py:meth:`DataArray.curvefit` (:issue:`7768`, :pull:`7821`). By `András Gunyhó `_. -- Add a ``allow_failures`` flag to :py:meth:`Dataset.curve_fit` that allows +- Add an ``errors`` option to :py:meth:`Dataset.curve_fit` that allows returning NaN for the parameters and covariances of failed fits, rather than failing the whole series of fits (:issue:`6317`, :pull:`7891`). By `Dominik Stańczak `_ and `András Gunyhó `_.