Skip to content

Commit 55f21de

Browse files
author
Joe Hamman
authored
Stop loading tutorial data by default (#2538)
* putting up for discussion: stop loading tutorial data by default * add tutorial.open_dataset * fix typo * add test for cached tutoreial data and minor doc fixes
1 parent 421be44 commit 55f21de

File tree

6 files changed

+40
-7
lines changed

6 files changed

+40
-7
lines changed

doc/indexing.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -411,7 +411,7 @@ can use indexing with ``.loc`` :
411411

412412
.. ipython:: python
413413
414-
ds = xr.tutorial.load_dataset('air_temperature')
414+
ds = xr.tutorial.open_dataset('air_temperature')
415415
416416
#add an empty 2D dataarray
417417
ds['empty']= xr.full_like(ds.air.mean('time'),fill_value=0)

doc/interpolation.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -262,7 +262,7 @@ Let's see how :py:meth:`~xarray.DataArray.interp` works on real data.
262262
.. ipython:: python
263263
264264
# Raw data
265-
ds = xr.tutorial.load_dataset('air_temperature').isel(time=0)
265+
ds = xr.tutorial.open_dataset('air_temperature').isel(time=0)
266266
fig, axes = plt.subplots(ncols=2, figsize=(10, 4))
267267
ds.air.plot(ax=axes[0])
268268
axes[0].set_title('Raw data')

doc/plotting.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ For these examples we'll use the North American air temperature dataset.
6060

6161
.. ipython:: python
6262
63-
airtemps = xr.tutorial.load_dataset('air_temperature')
63+
airtemps = xr.tutorial.open_dataset('air_temperature')
6464
airtemps
6565
6666
# Convert to celsius
@@ -585,7 +585,7 @@ This script will plot the air temperature on a map.
585585
.. ipython:: python
586586
587587
import cartopy.crs as ccrs
588-
air = xr.tutorial.load_dataset('air_temperature').air
588+
air = xr.tutorial.open_dataset('air_temperature').air
589589
ax = plt.axes(projection=ccrs.Orthographic(-80, 35))
590590
air.isel(time=0).plot.contourf(ax=ax, transform=ccrs.PlateCarree());
591591
@savefig plotting_maps_cartopy.png width=100%

doc/whats-new.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,11 @@ Breaking changes
7575
should significantly improve performance when reading and writing
7676
netCDF files with Dask, especially when working with many files or using
7777
Dask Distributed. By `Stephan Hoyer <https://github.com/shoyer>`_
78+
- Tutorial data is now loaded lazily. Previous behavior of
79+
:py:meth:`xarray.tutorial.load_dataset` would call `Dataset.load()` prior
80+
to returning. This was changed in order to facilitate using this data with
81+
dask.
82+
By `Joe Hamman <https://github.com/jhamman>`_.
7883

7984
Documentation
8085
~~~~~~~~~~~~~

xarray/tests/test_tutorial.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,11 @@ def setUp(self):
2323
os.remove('{}.md5'.format(self.testfilepath))
2424

2525
def test_download_from_github(self):
26-
ds = tutorial.load_dataset(self.testfile)
26+
ds = tutorial.open_dataset(self.testfile).load()
2727
tiny = DataArray(range(5), name='tiny').to_dataset()
2828
assert_identical(ds, tiny)
29+
30+
def test_download_from_github_load_without_cache(self):
31+
ds_nocache = tutorial.open_dataset(self.testfile, cache=False).load()
32+
ds_cache = tutorial.open_dataset(self.testfile).load()
33+
assert_identical(ds_cache, ds_nocache)

xarray/tutorial.py

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
import hashlib
1111
import os as _os
12+
import warnings
1213

1314
from .backends.api import open_dataset as _open_dataset
1415
from .core.pycompat import urlretrieve as _urlretrieve
@@ -24,7 +25,7 @@ def file_md5_checksum(fname):
2425

2526

2627
# idea borrowed from Seaborn
27-
def load_dataset(name, cache=True, cache_dir=_default_cache_dir,
28+
def open_dataset(name, cache=True, cache_dir=_default_cache_dir,
2829
github_url='https://github.com/pydata/xarray-data',
2930
branch='master', **kws):
3031
"""
@@ -48,6 +49,10 @@ def load_dataset(name, cache=True, cache_dir=_default_cache_dir,
4849
kws : dict, optional
4950
Passed to xarray.open_dataset
5051
52+
See Also
53+
--------
54+
xarray.open_dataset
55+
5156
"""
5257
longdir = _os.path.expanduser(cache_dir)
5358
fullname = name + '.nc'
@@ -77,9 +82,27 @@ def load_dataset(name, cache=True, cache_dir=_default_cache_dir,
7782
"""
7883
raise IOError(msg)
7984

80-
ds = _open_dataset(localfile, **kws).load()
85+
ds = _open_dataset(localfile, **kws)
8186

8287
if not cache:
88+
ds = ds.load()
8389
_os.remove(localfile)
8490

8591
return ds
92+
93+
94+
def load_dataset(*args, **kwargs):
95+
"""
96+
`load_dataset` will be removed in version 0.12. The current behavior of
97+
this function can be achived by using `tutorial.open_dataset(...).load()`.
98+
99+
See Also
100+
--------
101+
open_dataset
102+
"""
103+
warnings.warn(
104+
"load_dataset` will be removed in xarray version 0.12. The current "
105+
"behavior of this function can be achived by using "
106+
"`tutorial.open_dataset(...).load()`.",
107+
DeprecationWarning, stacklevel=2)
108+
return open_dataset(*args, **kwargs).load()

0 commit comments

Comments
 (0)