From 5c3a73fb5ae9ffc617c5b593ba69e2ae8a73cf68 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Sat, 3 Nov 2018 17:15:32 +0000 Subject: [PATCH 1/4] putting up for discussion: stop loading tutorial data by default --- doc/whats-new.rst | 5 +++++ xarray/tests/test_tutorial.py | 2 +- xarray/tutorial.py | 2 +- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 61d6eda4333..eda8f530749 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -70,6 +70,11 @@ Breaking changes should significantly improve performance when reading and writing netCDF files with Dask, especially when working with many files or using Dask Distributed. By `Stephan Hoyer `_ +- Tutorial data is now loaded lazily. Previous behavior of + :py:meth:`xarray.tutorial.load_dataset` would call `Dataset.load()` prior + to returning. This was changed in order to facilitate using this data with + dask. + By `Joe Hamman `_. Documentation ~~~~~~~~~~~~~ diff --git a/xarray/tests/test_tutorial.py b/xarray/tests/test_tutorial.py index 083ec5ee72f..7ff44d684e4 100644 --- a/xarray/tests/test_tutorial.py +++ b/xarray/tests/test_tutorial.py @@ -23,6 +23,6 @@ def setUp(self): os.remove('{}.md5'.format(self.testfilepath)) def test_download_from_github(self): - ds = tutorial.load_dataset(self.testfile) + ds = tutorial.load_dataset(self.testfile).load() tiny = DataArray(range(5), name='tiny').to_dataset() assert_identical(ds, tiny) diff --git a/xarray/tutorial.py b/xarray/tutorial.py index 83a8317f42b..ce28e3fd51d 100644 --- a/xarray/tutorial.py +++ b/xarray/tutorial.py @@ -77,7 +77,7 @@ def load_dataset(name, cache=True, cache_dir=_default_cache_dir, """ raise IOError(msg) - ds = _open_dataset(localfile, **kws).load() + ds = _open_dataset(localfile, **kws) if not cache: _os.remove(localfile) From abcaf5176ec4531dcea05222d6a56ec57606f622 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Sun, 4 Nov 2018 13:47:29 -0800 Subject: [PATCH 2/4] add tutorial.open_dataset --- doc/indexing.rst | 2 +- doc/interpolation.rst | 2 +- doc/plotting.rst | 4 ++-- xarray/tests/test_tutorial.py | 4 ++-- xarray/tutorial.py | 24 +++++++++++++++++++++++- 5 files changed, 29 insertions(+), 7 deletions(-) diff --git a/doc/indexing.rst b/doc/indexing.rst index c05bf9994fc..3878d983cf6 100644 --- a/doc/indexing.rst +++ b/doc/indexing.rst @@ -411,7 +411,7 @@ can use indexing with ``.loc`` : .. ipython:: python - ds = xr.tutorial.load_dataset('air_temperature') + ds = xr.tutorial.open_dataset('air_temperature') #add an empty 2D dataarray ds['empty']= xr.full_like(ds.air.mean('time'),fill_value=0) diff --git a/doc/interpolation.rst b/doc/interpolation.rst index 10e46331d0a..71e88079676 100644 --- a/doc/interpolation.rst +++ b/doc/interpolation.rst @@ -262,7 +262,7 @@ Let's see how :py:meth:`~xarray.DataArray.interp` works on real data. .. ipython:: python # Raw data - ds = xr.tutorial.load_dataset('air_temperature').isel(time=0) + ds = xr.tutorial.open_dataset('air_temperature').isel(time=0) fig, axes = plt.subplots(ncols=2, figsize=(10, 4)) ds.air.plot(ax=axes[0]) axes[0].set_title('Raw data') diff --git a/doc/plotting.rst b/doc/plotting.rst index 95e63cbff05..f8ba82febb0 100644 --- a/doc/plotting.rst +++ b/doc/plotting.rst @@ -60,7 +60,7 @@ For these examples we'll use the North American air temperature dataset. .. ipython:: python - airtemps = xr.tutorial.load_dataset('air_temperature') + airtemps = xr.tutorial.open_dataset('air_temperature') airtemps # Convert to celsius @@ -585,7 +585,7 @@ This script will plot the air temperature on a map. .. ipython:: python import cartopy.crs as ccrs - air = xr.tutorial.load_dataset('air_temperature').air + air = xr.tutorial.open_dataset('air_temperature').air ax = plt.axes(projection=ccrs.Orthographic(-80, 35)) air.isel(time=0).plot.contourf(ax=ax, transform=ccrs.PlateCarree()); @savefig plotting_maps_cartopy.png width=100% diff --git a/xarray/tests/test_tutorial.py b/xarray/tests/test_tutorial.py index 7ff44d684e4..dfa2a620366 100644 --- a/xarray/tests/test_tutorial.py +++ b/xarray/tests/test_tutorial.py @@ -23,6 +23,6 @@ def setUp(self): os.remove('{}.md5'.format(self.testfilepath)) def test_download_from_github(self): - ds = tutorial.load_dataset(self.testfile).load() + ds = tutorial.open_dataset(self.testfile).load() tiny = DataArray(range(5), name='tiny').to_dataset() - assert_identical(ds, tiny) + assert_identical(ds, tiny) \ No newline at end of file diff --git a/xarray/tutorial.py b/xarray/tutorial.py index ce28e3fd51d..8b9b814a4a9 100644 --- a/xarray/tutorial.py +++ b/xarray/tutorial.py @@ -9,6 +9,7 @@ import hashlib import os as _os +import warnings from .backends.api import open_dataset as _open_dataset from .core.pycompat import urlretrieve as _urlretrieve @@ -24,7 +25,7 @@ def file_md5_checksum(fname): # idea borrowed from Seaborn -def load_dataset(name, cache=True, cache_dir=_default_cache_dir, +def open_dataset(name, cache=True, cache_dir=_default_cache_dir, github_url='https://github.com/pydata/xarray-data', branch='master', **kws): """ @@ -48,6 +49,10 @@ def load_dataset(name, cache=True, cache_dir=_default_cache_dir, kws : dict, optional Passed to xarray.open_dataset + See Also + -------- + xarray.open_dataset + """ longdir = _os.path.expanduser(cache_dir) fullname = name + '.nc' @@ -83,3 +88,20 @@ def load_dataset(name, cache=True, cache_dir=_default_cache_dir, _os.remove(localfile) return ds + + +def load_dataset(*args, **kwargs): + """ + `load_dataset` will be removed in version 0.12. The current behavior of + this function can be achived by using `tutorial.open_dataset(...).load()`. + + See Also + -------- + open_dataset + """ + warnings.warn( + "load_dataset` will be removed in version 0.12. The current behavior " + "of this function can be achived by using " + "`tutorial.open_dataset(...).load()`." + FutureWarning, stacklevel=2) + return open_dataset(*args, **kwargs).load() From 75de32be71c395d391a03eb3df26cc7a470b0f52 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Sun, 4 Nov 2018 14:06:21 -0800 Subject: [PATCH 3/4] fix typo --- xarray/tutorial.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tutorial.py b/xarray/tutorial.py index 8b9b814a4a9..ebc25a3c2aa 100644 --- a/xarray/tutorial.py +++ b/xarray/tutorial.py @@ -102,6 +102,6 @@ def load_dataset(*args, **kwargs): warnings.warn( "load_dataset` will be removed in version 0.12. The current behavior " "of this function can be achived by using " - "`tutorial.open_dataset(...).load()`." + "`tutorial.open_dataset(...).load()`.", FutureWarning, stacklevel=2) return open_dataset(*args, **kwargs).load() From 8d7c25b09d8fff31af26c03c3c0a3edd0dfcb36c Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Mon, 5 Nov 2018 06:17:19 -0800 Subject: [PATCH 4/4] add test for cached tutoreial data and minor doc fixes --- xarray/tests/test_tutorial.py | 7 ++++++- xarray/tutorial.py | 7 ++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/xarray/tests/test_tutorial.py b/xarray/tests/test_tutorial.py index dfa2a620366..6547311aa2f 100644 --- a/xarray/tests/test_tutorial.py +++ b/xarray/tests/test_tutorial.py @@ -25,4 +25,9 @@ def setUp(self): def test_download_from_github(self): ds = tutorial.open_dataset(self.testfile).load() tiny = DataArray(range(5), name='tiny').to_dataset() - assert_identical(ds, tiny) \ No newline at end of file + assert_identical(ds, tiny) + + def test_download_from_github_load_without_cache(self): + ds_nocache = tutorial.open_dataset(self.testfile, cache=False).load() + ds_cache = tutorial.open_dataset(self.testfile).load() + assert_identical(ds_cache, ds_nocache) diff --git a/xarray/tutorial.py b/xarray/tutorial.py index ebc25a3c2aa..064eed330cc 100644 --- a/xarray/tutorial.py +++ b/xarray/tutorial.py @@ -85,6 +85,7 @@ def open_dataset(name, cache=True, cache_dir=_default_cache_dir, ds = _open_dataset(localfile, **kws) if not cache: + ds = ds.load() _os.remove(localfile) return ds @@ -100,8 +101,8 @@ def load_dataset(*args, **kwargs): open_dataset """ warnings.warn( - "load_dataset` will be removed in version 0.12. The current behavior " - "of this function can be achived by using " + "load_dataset` will be removed in xarray version 0.12. The current " + "behavior of this function can be achived by using " "`tutorial.open_dataset(...).load()`.", - FutureWarning, stacklevel=2) + DeprecationWarning, stacklevel=2) return open_dataset(*args, **kwargs).load()