diff --git a/ci/requirements/all-but-dask.yml b/ci/requirements/all-but-dask.yml index b156297816b..1387466b702 100644 --- a/ci/requirements/all-but-dask.yml +++ b/ci/requirements/all-but-dask.yml @@ -10,7 +10,6 @@ dependencies: - bottleneck - cartopy - cdms2 - - cfgrib - cftime - coveralls - flox diff --git a/ci/requirements/doc.yml b/ci/requirements/doc.yml index aea44865dcd..2d35ab8724b 100644 --- a/ci/requirements/doc.yml +++ b/ci/requirements/doc.yml @@ -7,7 +7,6 @@ dependencies: - python=3.10 - bottleneck - cartopy - - cfgrib>=0.9 - dask-core>=2022.1 - h5netcdf>=0.13 - ipykernel diff --git a/ci/requirements/environment-py311.yml b/ci/requirements/environment-py311.yml index 321cd4bb010..cd9edbb5052 100644 --- a/ci/requirements/environment-py311.yml +++ b/ci/requirements/environment-py311.yml @@ -8,7 +8,6 @@ dependencies: - bottleneck - cartopy # - cdms2 - - cfgrib - cftime - dask-core - distributed diff --git a/ci/requirements/environment-windows-py311.yml b/ci/requirements/environment-windows-py311.yml index 321611065a4..effef0d7961 100644 --- a/ci/requirements/environment-windows-py311.yml +++ b/ci/requirements/environment-windows-py311.yml @@ -6,7 +6,6 @@ dependencies: - bottleneck - cartopy # - cdms2 # Not available on Windows - # - cfgrib # Causes Python interpreter crash on Windows: https://github.com/pydata/xarray/pull/3340 - cftime - dask-core - distributed diff --git a/ci/requirements/environment-windows.yml b/ci/requirements/environment-windows.yml index c876887df73..c02907b24ac 100644 --- a/ci/requirements/environment-windows.yml +++ b/ci/requirements/environment-windows.yml @@ -6,7 +6,6 @@ dependencies: - bottleneck - cartopy # - cdms2 # Not available on Windows - # - cfgrib # Causes Python interpreter crash on Windows: https://github.com/pydata/xarray/pull/3340 - cftime - dask-core - distributed diff --git a/ci/requirements/environment.yml b/ci/requirements/environment.yml index cd82ccf4f06..9abe1b295a2 100644 --- a/ci/requirements/environment.yml +++ b/ci/requirements/environment.yml @@ -8,7 +8,6 @@ dependencies: - bottleneck - cartopy - cdms2 - - cfgrib - cftime - dask-core - distributed diff --git a/ci/requirements/min-all-deps.yml b/ci/requirements/min-all-deps.yml index 036a159675e..e50d08264b8 100644 --- a/ci/requirements/min-all-deps.yml +++ b/ci/requirements/min-all-deps.yml @@ -12,7 +12,6 @@ dependencies: - bottleneck=1.3 - cartopy=0.20 - cdms2=3.1 - - cfgrib=0.9 - cftime=1.5 - coveralls - dask-core=2022.1 diff --git a/doc/getting-started-guide/installing.rst b/doc/getting-started-guide/installing.rst index 5aa7fc0cee4..6b3283adcbd 100644 --- a/doc/getting-started-guide/installing.rst +++ b/doc/getting-started-guide/installing.rst @@ -45,8 +45,6 @@ For netCDF and IO other gridded raster datasets. - `iris `__: for conversion to and from iris' Cube objects -- `cfgrib `__: for reading GRIB files via the - *ECMWF ecCodes* library. For accelerating xarray ~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/user-guide/io.rst b/doc/user-guide/io.rst index 5610e7829f2..d5de181f562 100644 --- a/doc/user-guide/io.rst +++ b/doc/user-guide/io.rst @@ -1257,7 +1257,7 @@ GRIB format via cfgrib Xarray supports reading GRIB files via ECMWF cfgrib_ python driver, if it is installed. To open a GRIB file supply ``engine='cfgrib'`` -to :py:func:`open_dataset`: +to :py:func:`open_dataset` after installing cfgrib_: .. ipython:: :verbatim: diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 6bfdf0b6f0a..cbc84600d50 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -45,6 +45,10 @@ Documentation Internal Changes ~~~~~~~~~~~~~~~~ +- Remove internal support for reading GRIB files through the ``cfgrib`` backend. ``cfgrib`` now uses the external + backend interface, so no existing code should break. + By `Deepak Cherian `_. + .. _whats-new.2023.03.0: v2023.03.0 (March 22, 2023) diff --git a/setup.cfg b/setup.cfg index 43512144386..5d5cf161195 100644 --- a/setup.cfg +++ b/setup.cfg @@ -89,7 +89,6 @@ io = fsspec cftime rasterio - cfgrib pooch ## Scitools packages & dependencies (e.g: cartopy, cf-units) can be hard to install # scitools-iris diff --git a/xarray/backends/__init__.py b/xarray/backends/__init__.py index 302d89335dd..ca0b8fe4e6b 100644 --- a/xarray/backends/__init__.py +++ b/xarray/backends/__init__.py @@ -3,7 +3,6 @@ DataStores provide a uniform interface for saving and loading data in different formats. They should not be used directly, but rather through Dataset objects. """ -from xarray.backends.cfgrib_ import CfGribDataStore from xarray.backends.common import AbstractDataStore, BackendArray, BackendEntrypoint from xarray.backends.file_manager import ( CachingFileManager, @@ -30,7 +29,6 @@ "BackendEntrypoint", "FileManager", "CachingFileManager", - "CfGribDataStore", "DummyFileManager", "InMemoryDataStore", "NetCDF4DataStore", diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 8891ac2986b..e5adedbb576 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -43,7 +43,7 @@ T_NetcdfEngine = Literal["netcdf4", "scipy", "h5netcdf"] T_Engine = Union[ T_NetcdfEngine, - Literal["pydap", "pynio", "pseudonetcdf", "cfgrib", "zarr"], + Literal["pydap", "pynio", "pseudonetcdf", "zarr"], type[BackendEntrypoint], str, # no nice typing support for custom backends None, @@ -64,7 +64,6 @@ "h5netcdf": backends.H5NetCDFStore.open, "pynio": backends.NioDataStore, "pseudonetcdf": backends.PseudoNetCDFDataStore.open, - "cfgrib": backends.CfGribDataStore, "zarr": backends.ZarrStore.open_group, } @@ -387,7 +386,7 @@ def open_dataset( ends with .gz, in which case the file is gunzipped and opened with scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF). - engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "pynio", "cfgrib", \ + engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "pynio", \ "pseudonetcdf", "zarr", None}, installed backend \ or subclass of xarray.backends.BackendEntrypoint, optional Engine to use when reading files. If not provided, the default engine @@ -479,7 +478,7 @@ def open_dataset( relevant when using dask or another form of parallelism. By default, appropriate locks are chosen to safely read and write files with the currently active dask scheduler. Supported by "netcdf4", "h5netcdf", - "scipy", "pynio", "pseudonetcdf", "cfgrib". + "scipy", "pynio", "pseudonetcdf". See engine open function for kwargs accepted by each specific engine. @@ -576,7 +575,7 @@ def open_dataarray( ends with .gz, in which case the file is gunzipped and opened with scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF). - engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "pynio", "cfgrib", \ + engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "pynio", \ "pseudonetcdf", "zarr", None}, installed backend \ or subclass of xarray.backends.BackendEntrypoint, optional Engine to use when reading files. If not provided, the default engine @@ -666,7 +665,7 @@ def open_dataarray( relevant when using dask or another form of parallelism. By default, appropriate locks are chosen to safely read and write files with the currently active dask scheduler. Supported by "netcdf4", "h5netcdf", - "scipy", "pynio", "pseudonetcdf", "cfgrib". + "scipy", "pynio", "pseudonetcdf". See engine open function for kwargs accepted by each specific engine. @@ -803,7 +802,7 @@ def open_mfdataset( If provided, call this function on each dataset prior to concatenation. You can find the file-name from which each dataset was loaded in ``ds.encoding["source"]``. - engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "pynio", "cfgrib", \ + engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "pynio", \ "pseudonetcdf", "zarr", None}, installed backend \ or subclass of xarray.backends.BackendEntrypoint, optional Engine to use when reading files. If not provided, the default engine diff --git a/xarray/backends/cfgrib_.py b/xarray/backends/cfgrib_.py deleted file mode 100644 index 4c7d6a65e8f..00000000000 --- a/xarray/backends/cfgrib_.py +++ /dev/null @@ -1,148 +0,0 @@ -from __future__ import annotations - -import os -import warnings - -import numpy as np - -from xarray.backends.common import ( - BACKEND_ENTRYPOINTS, - AbstractDataStore, - BackendArray, - BackendEntrypoint, - _normalize_path, -) -from xarray.backends.locks import SerializableLock, ensure_lock -from xarray.backends.store import StoreBackendEntrypoint -from xarray.core import indexing -from xarray.core.utils import Frozen, FrozenDict, close_on_error, module_available -from xarray.core.variable import Variable - -# FIXME: Add a dedicated lock, even if ecCodes is supposed to be thread-safe -# in most circumstances. See: -# https://confluence.ecmwf.int/display/ECC/Frequently+Asked+Questions -ECCODES_LOCK = SerializableLock() - - -class CfGribArrayWrapper(BackendArray): - def __init__(self, datastore, array): - self.datastore = datastore - self.shape = array.shape - self.dtype = array.dtype - self.array = array - - def __getitem__(self, key): - return indexing.explicit_indexing_adapter( - key, self.shape, indexing.IndexingSupport.BASIC, self._getitem - ) - - def _getitem(self, key): - with self.datastore.lock: - return self.array[key] - - -class CfGribDataStore(AbstractDataStore): - """ - Implements the ``xr.AbstractDataStore`` read-only API for a GRIB file. - """ - - def __init__(self, filename, lock=None, **backend_kwargs): - try: - import cfgrib - # cfgrib throws a RuntimeError if eccodes is not installed - except (ImportError, RuntimeError) as err: - warnings.warn( - "Failed to load cfgrib - most likely there is a problem accessing the ecCodes library. " - "Try `import cfgrib` to get the full error message" - ) - raise err - - if lock is None: - lock = ECCODES_LOCK - self.lock = ensure_lock(lock) - self.ds = cfgrib.open_file(filename, **backend_kwargs) - - def open_store_variable(self, name, var): - if isinstance(var.data, np.ndarray): - data = var.data - else: - wrapped_array = CfGribArrayWrapper(self, var.data) - data = indexing.LazilyIndexedArray(wrapped_array) - - encoding = self.ds.encoding.copy() - encoding["original_shape"] = var.data.shape - - return Variable(var.dimensions, data, var.attributes, encoding) - - def get_variables(self): - return FrozenDict( - (k, self.open_store_variable(k, v)) for k, v in self.ds.variables.items() - ) - - def get_attrs(self): - return Frozen(self.ds.attributes) - - def get_dimensions(self): - return Frozen(self.ds.dimensions) - - def get_encoding(self): - dims = self.get_dimensions() - return {"unlimited_dims": {k for k, v in dims.items() if v is None}} - - -class CfgribfBackendEntrypoint(BackendEntrypoint): - available = module_available("cfgrib") - - def guess_can_open(self, filename_or_obj): - try: - _, ext = os.path.splitext(filename_or_obj) - except TypeError: - return False - return ext in {".grib", ".grib2", ".grb", ".grb2"} - - def open_dataset( - self, - filename_or_obj, - *, - mask_and_scale=True, - decode_times=True, - concat_characters=True, - decode_coords=True, - drop_variables=None, - use_cftime=None, - decode_timedelta=None, - lock=None, - indexpath="{path}.{short_hash}.idx", - filter_by_keys={}, - read_keys=[], - encode_cf=("parameter", "time", "geography", "vertical"), - squeeze=True, - time_dims=("time", "step"), - ): - filename_or_obj = _normalize_path(filename_or_obj) - store = CfGribDataStore( - filename_or_obj, - indexpath=indexpath, - filter_by_keys=filter_by_keys, - read_keys=read_keys, - encode_cf=encode_cf, - squeeze=squeeze, - time_dims=time_dims, - lock=lock, - ) - store_entrypoint = StoreBackendEntrypoint() - with close_on_error(store): - ds = store_entrypoint.open_dataset( - store, - mask_and_scale=mask_and_scale, - decode_times=decode_times, - concat_characters=concat_characters, - decode_coords=decode_coords, - drop_variables=drop_variables, - use_cftime=use_cftime, - decode_timedelta=decode_timedelta, - ) - return ds - - -BACKEND_ENTRYPOINTS["cfgrib"] = CfgribfBackendEntrypoint diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index be81bd18a58..864b3df8405 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -75,7 +75,6 @@ def _importorskip( has_zarr, requires_zarr = _importorskip("zarr") has_fsspec, requires_fsspec = _importorskip("fsspec") has_iris, requires_iris = _importorskip("iris") -has_cfgrib, requires_cfgrib = _importorskip("cfgrib") has_numbagg, requires_numbagg = _importorskip("numbagg") has_seaborn, requires_seaborn = _importorskip("seaborn") has_sparse, requires_sparse = _importorskip("sparse") diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index bc6b095fc4e..7b5f94a6f5a 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -63,7 +63,6 @@ has_scipy, mock, network, - requires_cfgrib, requires_cftime, requires_dask, requires_fsspec, @@ -4176,51 +4175,6 @@ def test_weakrefs(self) -> None: assert_identical(actual, expected) -@requires_cfgrib -class TestCfGrib: - def test_read(self) -> None: - expected = { - "number": 2, - "time": 3, - "isobaricInhPa": 2, - "latitude": 3, - "longitude": 4, - } - with open_example_dataset("example.grib", engine="cfgrib") as ds: - assert ds.dims == expected - assert list(ds.data_vars) == ["z", "t"] - assert ds["z"].min() == 12660.0 - - def test_read_filter_by_keys(self) -> None: - kwargs = {"filter_by_keys": {"shortName": "t"}} - expected = { - "number": 2, - "time": 3, - "isobaricInhPa": 2, - "latitude": 3, - "longitude": 4, - } - with open_example_dataset( - "example.grib", engine="cfgrib", backend_kwargs=kwargs - ) as ds: - assert ds.dims == expected - assert list(ds.data_vars) == ["t"] - assert ds["t"].min() == 231.0 - - def test_read_outer(self) -> None: - expected = { - "number": 2, - "time": 3, - "isobaricInhPa": 2, - "latitude": 2, - "longitude": 3, - } - with open_example_dataset("example.grib", engine="cfgrib") as ds: - res = ds.isel(latitude=[0, 2], longitude=[0, 1, 2]) - assert res.dims == expected - assert res["t"].min() == 231.0 - - @requires_pseudonetcdf @pytest.mark.filterwarnings("ignore:IOAPI_ISPH is assumed to be 6370000") class TestPseudoNetCDFFormat: diff --git a/xarray/tests/test_distributed.py b/xarray/tests/test_distributed.py index e13667af73a..a29cccd0f50 100644 --- a/xarray/tests/test_distributed.py +++ b/xarray/tests/test_distributed.py @@ -35,7 +35,6 @@ has_h5netcdf, has_netCDF4, has_scipy, - requires_cfgrib, requires_cftime, requires_netCDF4, requires_rasterio, @@ -45,7 +44,6 @@ ON_WINDOWS, create_tmp_file, create_tmp_geotiff, - open_example_dataset, ) from xarray.tests.test_dataset import create_test_data @@ -210,20 +208,6 @@ def test_dask_distributed_rasterio_integration_test(loop) -> None: assert_allclose(actual, expected) -@requires_cfgrib -@pytest.mark.filterwarnings("ignore:deallocating CachingFileManager") -def test_dask_distributed_cfgrib_integration_test(loop) -> None: - with cluster() as (s, [a, b]): - with Client(s["address"], loop=loop): - with open_example_dataset( - "example.grib", engine="cfgrib", chunks={"time": 1} - ) as ds: - with open_example_dataset("example.grib", engine="cfgrib") as expected: - assert isinstance(ds["t"].data, da.Array) - actual = ds.compute() - assert_allclose(actual, expected) - - @pytest.mark.xfail( condition=Version(distributed.__version__) < Version("2022.02.0"), reason="https://github.com/dask/distributed/pull/5739", diff --git a/xarray/tests/test_plugins.py b/xarray/tests/test_plugins.py index 2160b8d16ed..421be1df2dc 100644 --- a/xarray/tests/test_plugins.py +++ b/xarray/tests/test_plugins.py @@ -126,12 +126,12 @@ def test_set_missing_parameters_raise_error() -> None: ) def test_build_engines() -> None: dummy_pkg_entrypoint = EntryPoint( - "cfgrib", "xarray.tests.test_plugins:backend_1", "xarray_backends" + "dummy", "xarray.tests.test_plugins:backend_1", "xarray_backends" ) backend_entrypoints = plugins.build_engines([dummy_pkg_entrypoint]) - assert isinstance(backend_entrypoints["cfgrib"], DummyBackendEntrypoint1) - assert backend_entrypoints["cfgrib"].open_dataset_parameters == ( + assert isinstance(backend_entrypoints["dummy"], DummyBackendEntrypoint1) + assert backend_entrypoints["dummy"].open_dataset_parameters == ( "filename_or_obj", "decoder", ) @@ -192,7 +192,6 @@ def test_lazy_import() -> None: Only when running code for the first time that requires them. """ blacklisted = [ - # "cfgrib", # TODO: cfgrib has its own plugin now, deprecate? "h5netcdf", "netCDF4", "PseudoNetCDF", diff --git a/xarray/tutorial.py b/xarray/tutorial.py index fca22aaf5af..17fde8e3b92 100644 --- a/xarray/tutorial.py +++ b/xarray/tutorial.py @@ -153,6 +153,12 @@ def open_dataset( elif path.suffix == ".grib": if engine is None: engine = "cfgrib" + try: + import cfgrib # noqa + except ImportError as e: + raise ImportError( + "Reading this tutorial dataset requires the cfgrib package." + ) from e url = f"{base_url}/raw/{version}/{path.name}" diff --git a/xarray/util/print_versions.py b/xarray/util/print_versions.py index d470d9b0633..42ce3746942 100755 --- a/xarray/util/print_versions.py +++ b/xarray/util/print_versions.py @@ -109,7 +109,6 @@ def show_versions(file=sys.stdout): ("nc_time_axis", lambda mod: mod.__version__), ("PseudoNetCDF", lambda mod: mod.__version__), ("rasterio", lambda mod: mod.__version__), - ("cfgrib", lambda mod: mod.__version__), ("iris", lambda mod: mod.__version__), ("bottleneck", lambda mod: mod.__version__), ("dask", lambda mod: mod.__version__),