Skip to content

Commit 134c058

Browse files
committed
moving more reade choice logic away from ExcelFile class to BaseFile
1 parent dba3d37 commit 134c058

File tree

1 file changed

+88
-56
lines changed

1 file changed

+88
-56
lines changed

pandas/io/excel.py

+88-56
Original file line numberDiff line numberDiff line change
@@ -70,39 +70,71 @@ def get_writer(engine_name):
7070

7171

7272
class BaseFile(object):
73-
""" Class for identifying the type of reader
73+
"""Base class for excel readers
74+
75+
A file class can be initialized even if the engine is not installed.
76+
If the engine is not installed, io_class and _open_workbook are
77+
type(None) and None respectively. When attempting to use open_workbook
78+
while _open_workbook is None, the relevant ImportError is raised. When
79+
calling is_type or is_ext, False is retunred when the reader is not
80+
installed.
7481
"""
7582
# added in order to deal with both Open Document Format and MS Excel
7683
# fil types, see PR #9070
7784

78-
def __init__(self, engine, extensions, io_class, open_workbook,
79-
try_engine=False):
85+
def __init__(self, engine, extensions):
86+
"""Set the engine name, and extension. If the engine is not installed,
87+
io_class and _open_workbook are type(None) and None respectively,
88+
but the ImportError is ingored at the init stage.
89+
"""
8090
self.engine = engine
8191
self.extensions = extensions
82-
self.io_class = io_class
83-
self.open_workbook = open_workbook
84-
if try_engine:
92+
self.io_class = type(None)
93+
self._open_workbook = None
94+
# fail silently if engine is not presented on init, only raise when
95+
# attempting to use it when opening a workbook
96+
try:
97+
self.load_engine()
98+
except ImportError:
99+
pass
100+
101+
def open_workbook(self, *args, **kwargs):
102+
"""Explicitely load the engine again (and trigger an ImportError in the
103+
process) if _open_workbook is set to None.
104+
"""
105+
# try to load the engine again and raise import error if required
106+
if isinstance(self._open_workbook, type(None)):
85107
self.load_engine()
108+
# just in case the user passes an already opened workbook of io_class
109+
if isinstance(args[0], self.io_class):
110+
return args[0]
111+
return self._open_workbook(*args, **kwargs)
86112

87113
def is_ext(self, path):
88-
"""Verify if the path's extension is supported by the reader
114+
"""Verify if the path's extension is supported by the reader. If the
115+
reader is not available, return False
89116
"""
90117
ext = path.split('.')[-1]
91-
if ext in self.extensions:
118+
if isinstance(self.io_class, type(None)):
119+
return False
120+
elif ext in self.extensions:
92121
return True
93122
else:
94123
return False
95124

96125
def is_type(self, io):
97-
"""Verify if the io type is supported by the reader
126+
"""Verify if the io type is supported by the reader. If the reader is
127+
not installed, return False.
98128
"""
99-
if isinstance(io, self.io_class):
129+
if isinstance(io, type(None)):
130+
return False
131+
elif isinstance(io, self.io_class):
100132
return True
101133
else:
102134
return False
103135

104136
def load_engine(self):
105-
"""Load the engine if installed
137+
"""Load the engine if installed, but raise ImportError otherwise.
106138
"""
107139
try:
108140
self._load_engine()
@@ -114,12 +146,13 @@ def load_engine(self):
114146

115147

116148
class XLRDFile(BaseFile):
117-
118-
def __init__(self, try_engine=False):
149+
"""File reader class for MS Excel spreadsheets (depends on xlrd)
150+
"""
151+
def __init__(self):
119152
# engine, extensions, are defined here, but io_class and open_workbook
120153
# are only defined when importing the engine
121-
args = ('xlrd', ['xls', 'xlsx', 'xlsm'], type(None), None)
122-
super(XLRDFile, self).__init__(*args, try_engine=try_engine)
154+
args = ('xlrd', ['xls', 'xlsx', 'xlsm'])
155+
super(XLRDFile, self).__init__(*args)
123156

124157
def _load_engine(self):
125158
import xlrd # throw an ImportError if we need to
@@ -128,21 +161,22 @@ def _load_engine(self):
128161
raise ImportError("pandas requires xlrd >= 0.9.0 for excel "
129162
"support, current version " + xlrd.__VERSION__)
130163
else:
131-
self.open_workbook = xlrd.open_workbook
164+
self._open_workbook = xlrd.open_workbook
132165
self.io_class = xlrd.Book
133166

134167

135168
class EZODFFile(BaseFile):
136-
137-
def __init__(self, try_engine=False):
169+
"""File reader class for ODF spreadsheets (depends on ezodf)
170+
"""
171+
def __init__(self):
138172
# engine, extensions, are defined here, but io_class and open_workbook
139173
# are only defined when importing the engine
140-
args = ('ezodf', ['ods'], type(None), None)
141-
super(EZODFFile, self).__init__(*args, try_engine=try_engine)
174+
args = ('ezodf', ['ods'])
175+
super(EZODFFile, self).__init__(*args)
142176

143177
def _load_engine(self):
144-
import ezodf
145-
self.open_workbook = ezodf.opendoc
178+
import ezodf # throw an ImportError if we need to
179+
self._open_workbook = ezodf.opendoc
146180
self.io_class = ezodf.document.PackagedDocument
147181

148182

@@ -250,44 +284,42 @@ def __init__(self, io, **kwds):
250284
self.io = io
251285

252286
self.engine = kwds.pop('engine', None)
253-
# when the engine is not installed, do not throw import error
254-
xlrd_f = XLRDFile(try_engine=True)
255-
ezodf_f = EZODFFile(try_engine=True)
256-
257-
if self.engine is None:
258-
for f_typ in [xlrd_f, ezodf_f]:
259-
# derive engine from file extension if io is a path/url
260-
if isinstance(io, compat.string_types):
261-
if f_typ.is_ext(io):
262-
self.engine = f_typ.engine
263-
if _is_url(io):
264-
data = _urlopen(io).read()
265-
self.book = f_typ.open_workbook(file_contents=data)
266-
else:
267-
self.book = f_typ.open_workbook(io)
268-
return
269-
# does the io type match any available reader types?
270-
elif isinstance(io, f_typ.io_class):
287+
# Initiate pandas supported file readers
288+
xlrd_f = XLRDFile()
289+
ezodf_f = EZODFFile()
290+
supperted_f_types = [xlrd_f, ezodf_f]
291+
292+
for f_typ in supperted_f_types:
293+
if self.engine == f_typ.engine:
294+
self.book = f_typ.open_workbook(io)
295+
return
296+
# derive engine from file extension if io is a path/url
297+
elif isinstance(io, compat.string_types):
298+
if f_typ.is_ext(io):
271299
self.engine = f_typ.engine
272-
self.book = io
300+
if _is_url(io):
301+
data = _urlopen(io).read()
302+
# open_workbook: BaseFile will raise ImportError
303+
# when the engine is not installed
304+
self.book = f_typ.open_workbook(file_contents=data)
305+
else:
306+
self.book = f_typ.open_workbook(io)
273307
return
274-
275-
if self.engine == xlrd_f.engine and isinstance(io, xlrd_f.io_class):
276-
# force import error when necessary
277-
import xlrd
278-
self.book = io
279-
elif not isinstance(io, xlrd_f.io_class) and hasattr(io, "read"):
308+
# Does the io type match any supported reader types?
309+
# Note that io_class is None if engine is not installed
310+
elif isinstance(io, f_typ.io_class):
311+
self.engine = f_typ.engine
312+
self.book = io
313+
return
314+
315+
# # xlrd has some additional/alternative reading mechanisms:
316+
# if self.engine == xlrd_f.engine and isinstance(io, xlrd_f.io_class):
317+
# self.book = io
318+
if not isinstance(io, xlrd_f.io_class) and hasattr(io, "read"):
280319
# N.B. xlrd.Book has a read attribute too
281320
data = io.read()
282-
import xlrd
283-
self.book = xlrd.open_workbook(file_contents=data)
284-
self.engine = 'xlrd'
285-
elif self.engine == xlrd_f.engine:
286-
self.book = xlrd_f.open_workbook(io)
287-
elif self.engine == ezodf_f.engine:
288-
# force import error when necessary
289-
import ezodf
290-
self.book = ezodf_f.open_workbook(io)
321+
self.book = xlrd_f.open_workbook(file_contents=data)
322+
self.engine = xlrd_f.engine
291323
else:
292324
raise ValueError('Must explicitly set engine if not passing in'
293325
' buffer or path for io.')

0 commit comments

Comments
 (0)