@@ -70,39 +70,71 @@ def get_writer(engine_name):
70
70
71
71
72
72
class BaseFile (object ):
73
- """ Class for identifying the type of reader
73
+ """Base class for excel readers
74
+
75
+ A file class can be initialized even if the engine is not installed.
76
+ If the engine is not installed, io_class and _open_workbook are
77
+ type(None) and None respectively. When attempting to use open_workbook
78
+ while _open_workbook is None, the relevant ImportError is raised. When
79
+ calling is_type or is_ext, False is retunred when the reader is not
80
+ installed.
74
81
"""
75
82
# added in order to deal with both Open Document Format and MS Excel
76
83
# fil types, see PR #9070
77
84
78
- def __init__ (self , engine , extensions , io_class , open_workbook ,
79
- try_engine = False ):
85
+ def __init__ (self , engine , extensions ):
86
+ """Set the engine name, and extension. If the engine is not installed,
87
+ io_class and _open_workbook are type(None) and None respectively,
88
+ but the ImportError is ingored at the init stage.
89
+ """
80
90
self .engine = engine
81
91
self .extensions = extensions
82
- self .io_class = io_class
83
- self .open_workbook = open_workbook
84
- if try_engine :
92
+ self .io_class = type (None )
93
+ self ._open_workbook = None
94
+ # fail silently if engine is not presented on init, only raise when
95
+ # attempting to use it when opening a workbook
96
+ try :
97
+ self .load_engine ()
98
+ except ImportError :
99
+ pass
100
+
101
+ def open_workbook (self , * args , ** kwargs ):
102
+ """Explicitely load the engine again (and trigger an ImportError in the
103
+ process) if _open_workbook is set to None.
104
+ """
105
+ # try to load the engine again and raise import error if required
106
+ if isinstance (self ._open_workbook , type (None )):
85
107
self .load_engine ()
108
+ # just in case the user passes an already opened workbook of io_class
109
+ if isinstance (args [0 ], self .io_class ):
110
+ return args [0 ]
111
+ return self ._open_workbook (* args , ** kwargs )
86
112
87
113
def is_ext (self , path ):
88
- """Verify if the path's extension is supported by the reader
114
+ """Verify if the path's extension is supported by the reader. If the
115
+ reader is not available, return False
89
116
"""
90
117
ext = path .split ('.' )[- 1 ]
91
- if ext in self .extensions :
118
+ if isinstance (self .io_class , type (None )):
119
+ return False
120
+ elif ext in self .extensions :
92
121
return True
93
122
else :
94
123
return False
95
124
96
125
def is_type (self , io ):
97
- """Verify if the io type is supported by the reader
126
+ """Verify if the io type is supported by the reader. If the reader is
127
+ not installed, return False.
98
128
"""
99
- if isinstance (io , self .io_class ):
129
+ if isinstance (io , type (None )):
130
+ return False
131
+ elif isinstance (io , self .io_class ):
100
132
return True
101
133
else :
102
134
return False
103
135
104
136
def load_engine (self ):
105
- """Load the engine if installed
137
+ """Load the engine if installed, but raise ImportError otherwise.
106
138
"""
107
139
try :
108
140
self ._load_engine ()
@@ -114,12 +146,13 @@ def load_engine(self):
114
146
115
147
116
148
class XLRDFile (BaseFile ):
117
-
118
- def __init__ (self , try_engine = False ):
149
+ """File reader class for MS Excel spreadsheets (depends on xlrd)
150
+ """
151
+ def __init__ (self ):
119
152
# engine, extensions, are defined here, but io_class and open_workbook
120
153
# are only defined when importing the engine
121
- args = ('xlrd' , ['xls' , 'xlsx' , 'xlsm' ], type ( None ), None )
122
- super (XLRDFile , self ).__init__ (* args , try_engine = try_engine )
154
+ args = ('xlrd' , ['xls' , 'xlsx' , 'xlsm' ])
155
+ super (XLRDFile , self ).__init__ (* args )
123
156
124
157
def _load_engine (self ):
125
158
import xlrd # throw an ImportError if we need to
@@ -128,21 +161,22 @@ def _load_engine(self):
128
161
raise ImportError ("pandas requires xlrd >= 0.9.0 for excel "
129
162
"support, current version " + xlrd .__VERSION__ )
130
163
else :
131
- self .open_workbook = xlrd .open_workbook
164
+ self ._open_workbook = xlrd .open_workbook
132
165
self .io_class = xlrd .Book
133
166
134
167
135
168
class EZODFFile (BaseFile ):
136
-
137
- def __init__ (self , try_engine = False ):
169
+ """File reader class for ODF spreadsheets (depends on ezodf)
170
+ """
171
+ def __init__ (self ):
138
172
# engine, extensions, are defined here, but io_class and open_workbook
139
173
# are only defined when importing the engine
140
- args = ('ezodf' , ['ods' ], type ( None ), None )
141
- super (EZODFFile , self ).__init__ (* args , try_engine = try_engine )
174
+ args = ('ezodf' , ['ods' ])
175
+ super (EZODFFile , self ).__init__ (* args )
142
176
143
177
def _load_engine (self ):
144
- import ezodf
145
- self .open_workbook = ezodf .opendoc
178
+ import ezodf # throw an ImportError if we need to
179
+ self ._open_workbook = ezodf .opendoc
146
180
self .io_class = ezodf .document .PackagedDocument
147
181
148
182
@@ -250,44 +284,42 @@ def __init__(self, io, **kwds):
250
284
self .io = io
251
285
252
286
self .engine = kwds .pop ('engine' , None )
253
- # when the engine is not installed, do not throw import error
254
- xlrd_f = XLRDFile (try_engine = True )
255
- ezodf_f = EZODFFile (try_engine = True )
256
-
257
- if self .engine is None :
258
- for f_typ in [xlrd_f , ezodf_f ]:
259
- # derive engine from file extension if io is a path/url
260
- if isinstance (io , compat .string_types ):
261
- if f_typ .is_ext (io ):
262
- self .engine = f_typ .engine
263
- if _is_url (io ):
264
- data = _urlopen (io ).read ()
265
- self .book = f_typ .open_workbook (file_contents = data )
266
- else :
267
- self .book = f_typ .open_workbook (io )
268
- return
269
- # does the io type match any available reader types?
270
- elif isinstance (io , f_typ .io_class ):
287
+ # Initiate pandas supported file readers
288
+ xlrd_f = XLRDFile ()
289
+ ezodf_f = EZODFFile ()
290
+ supperted_f_types = [xlrd_f , ezodf_f ]
291
+
292
+ for f_typ in supperted_f_types :
293
+ if self .engine == f_typ .engine :
294
+ self .book = f_typ .open_workbook (io )
295
+ return
296
+ # derive engine from file extension if io is a path/url
297
+ elif isinstance (io , compat .string_types ):
298
+ if f_typ .is_ext (io ):
271
299
self .engine = f_typ .engine
272
- self .book = io
300
+ if _is_url (io ):
301
+ data = _urlopen (io ).read ()
302
+ # open_workbook: BaseFile will raise ImportError
303
+ # when the engine is not installed
304
+ self .book = f_typ .open_workbook (file_contents = data )
305
+ else :
306
+ self .book = f_typ .open_workbook (io )
273
307
return
274
-
275
- if self .engine == xlrd_f .engine and isinstance (io , xlrd_f .io_class ):
276
- # force import error when necessary
277
- import xlrd
278
- self .book = io
279
- elif not isinstance (io , xlrd_f .io_class ) and hasattr (io , "read" ):
308
+ # Does the io type match any supported reader types?
309
+ # Note that io_class is None if engine is not installed
310
+ elif isinstance (io , f_typ .io_class ):
311
+ self .engine = f_typ .engine
312
+ self .book = io
313
+ return
314
+
315
+ # # xlrd has some additional/alternative reading mechanisms:
316
+ # if self.engine == xlrd_f.engine and isinstance(io, xlrd_f.io_class):
317
+ # self.book = io
318
+ if not isinstance (io , xlrd_f .io_class ) and hasattr (io , "read" ):
280
319
# N.B. xlrd.Book has a read attribute too
281
320
data = io .read ()
282
- import xlrd
283
- self .book = xlrd .open_workbook (file_contents = data )
284
- self .engine = 'xlrd'
285
- elif self .engine == xlrd_f .engine :
286
- self .book = xlrd_f .open_workbook (io )
287
- elif self .engine == ezodf_f .engine :
288
- # force import error when necessary
289
- import ezodf
290
- self .book = ezodf_f .open_workbook (io )
321
+ self .book = xlrd_f .open_workbook (file_contents = data )
322
+ self .engine = xlrd_f .engine
291
323
else :
292
324
raise ValueError ('Must explicitly set engine if not passing in'
293
325
' buffer or path for io.' )
0 commit comments