@@ -75,6 +75,34 @@ def _get_default_engine_netcdf():
75
75
return engine
76
76
77
77
78
+ def _get_engine_from_magic_number (filename_or_obj ):
79
+ # check byte header to determine file type
80
+ if isinstance (filename_or_obj , bytes ):
81
+ magic_number = filename_or_obj [:8 ]
82
+ else :
83
+ if filename_or_obj .tell () != 0 :
84
+ raise ValueError ("file-like object read/write pointer not at zero "
85
+ "please close and reopen, or use a context "
86
+ "manager" )
87
+ magic_number = filename_or_obj .read (8 )
88
+ filename_or_obj .seek (0 )
89
+
90
+ if magic_number .startswith (b'CDF' ):
91
+ engine = 'scipy'
92
+ elif magic_number .startswith (b'\211 HDF\r \n \032 \n ' ):
93
+ engine = 'h5netcdf'
94
+ if isinstance (filename_or_obj , bytes ):
95
+ raise ValueError ("can't open netCDF4/HDF5 as bytes "
96
+ "try passing a path or file-like object" )
97
+ else :
98
+ if isinstance (filename_or_obj , bytes ) and len (filename_or_obj ) > 80 :
99
+ filename_or_obj = filename_or_obj [:80 ] + b'...'
100
+ raise ValueError ('{} is not a valid netCDF file '
101
+ 'did you mean to pass a string for a path instead?'
102
+ .format (filename_or_obj ))
103
+ return engine
104
+
105
+
78
106
def _get_default_engine (path , allow_remote = False ):
79
107
if allow_remote and is_remote_uri (path ):
80
108
engine = _get_default_engine_remote_uri ()
@@ -170,8 +198,8 @@ def open_dataset(filename_or_obj, group=None, decode_cf=True,
170
198
Strings and Path objects are interpreted as a path to a netCDF file
171
199
or an OpenDAP URL and opened with python-netCDF4, unless the filename
172
200
ends with .gz, in which case the file is gunzipped and opened with
173
- scipy.io.netcdf (only netCDF3 supported). File-like objects are opened
174
- with scipy.io.netcdf (only netCDF3 supported ).
201
+ scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
202
+ objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF ).
175
203
group : str, optional
176
204
Path to the netCDF4 group in the given file to open (only works for
177
205
netCDF4 files).
@@ -258,6 +286,13 @@ def open_dataset(filename_or_obj, group=None, decode_cf=True,
258
286
--------
259
287
open_mfdataset
260
288
"""
289
+ engines = [None , 'netcdf4' , 'scipy' , 'pydap' , 'h5netcdf' , 'pynio' ,
290
+ 'cfgrib' , 'pseudonetcdf' ]
291
+ if engine not in engines :
292
+ raise ValueError ('unrecognized engine for open_dataset: {}\n '
293
+ 'must be one of: {}'
294
+ .format (engine , engines ))
295
+
261
296
if autoclose is not None :
262
297
warnings .warn (
263
298
'The autoclose argument is no longer used by '
@@ -316,18 +351,9 @@ def maybe_decode_store(store, lock=False):
316
351
317
352
if isinstance (filename_or_obj , backends .AbstractDataStore ):
318
353
store = filename_or_obj
319
- ds = maybe_decode_store (store )
320
- elif isinstance (filename_or_obj , str ):
321
354
322
- if (isinstance (filename_or_obj , bytes ) and
323
- filename_or_obj .startswith (b'\x89 HDF' )):
324
- raise ValueError ('cannot read netCDF4/HDF5 file images' )
325
- elif (isinstance (filename_or_obj , bytes ) and
326
- filename_or_obj .startswith (b'CDF' )):
327
- # netCDF3 file images are handled by scipy
328
- pass
329
- elif isinstance (filename_or_obj , str ):
330
- filename_or_obj = _normalize_path (filename_or_obj )
355
+ elif isinstance (filename_or_obj , str ):
356
+ filename_or_obj = _normalize_path (filename_or_obj )
331
357
332
358
if engine is None :
333
359
engine = _get_default_engine (filename_or_obj ,
@@ -352,18 +378,19 @@ def maybe_decode_store(store, lock=False):
352
378
elif engine == 'cfgrib' :
353
379
store = backends .CfGribDataStore (
354
380
filename_or_obj , lock = lock , ** backend_kwargs )
355
- else :
356
- raise ValueError ('unrecognized engine for open_dataset: %r'
357
- % engine )
358
381
359
- with close_on_error (store ):
360
- ds = maybe_decode_store (store )
361
382
else :
362
- if engine is not None and engine != 'scipy' :
363
- raise ValueError ('can only read file-like objects with '
364
- "default engine or engine='scipy'" )
365
- # assume filename_or_obj is a file-like object
366
- store = backends .ScipyDataStore (filename_or_obj )
383
+ if engine not in [None , 'scipy' , 'h5netcdf' ]:
384
+ raise ValueError ("can only read bytes or file-like objects "
385
+ "with engine='scipy' or 'h5netcdf'" )
386
+ engine = _get_engine_from_magic_number (filename_or_obj )
387
+ if engine == 'scipy' :
388
+ store = backends .ScipyDataStore (filename_or_obj , ** backend_kwargs )
389
+ elif engine == 'h5netcdf' :
390
+ store = backends .H5NetCDFStore (filename_or_obj , group = group ,
391
+ lock = lock , ** backend_kwargs )
392
+
393
+ with close_on_error (store ):
367
394
ds = maybe_decode_store (store )
368
395
369
396
# Ensure source filename always stored in dataset object (GH issue #2550)
@@ -390,8 +417,8 @@ def open_dataarray(filename_or_obj, group=None, decode_cf=True,
390
417
Strings and Paths are interpreted as a path to a netCDF file or an
391
418
OpenDAP URL and opened with python-netCDF4, unless the filename ends
392
419
with .gz, in which case the file is gunzipped and opened with
393
- scipy.io.netcdf (only netCDF3 supported). File-like objects are opened
394
- with scipy.io.netcdf (only netCDF3 supported ).
420
+ scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like
421
+ objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF ).
395
422
group : str, optional
396
423
Path to the netCDF4 group in the given file to open (only works for
397
424
netCDF4 files).
0 commit comments