Skip to content
Merged
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ Other enhancements
- :func:`pandas.read_sql_query` now accepts a ``dtype`` argument to cast the columnar data from the SQL database based on user input (:issue:`10285`)
- Improved integer type mapping from pandas to SQLAlchemy when using :meth:`DataFrame.to_sql` (:issue:`35076`)
- :func:`to_numeric` now supports downcasting of nullable ``ExtensionDtype`` objects (:issue:`33013`)
- :func:`pandas.read_excel` can now auto detect .xlsb files (:issue:`35416`)
- :func:`pandas.read_excel` now respects ``pandas.set_option`` (:issue:`34252`)

.. ---------------------------------------------------------------------------

Expand Down
10 changes: 5 additions & 5 deletions pandas/core/config_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -524,15 +524,15 @@ def use_inf_as_na_cb(key):
"reader",
"auto",
reader_engine_doc.format(ext="xls", others=", ".join(_xls_options)),
validator=str,
validator=is_one_of_factory(_xls_options + ["auto"]),
)

with cf.config_prefix("io.excel.xlsm"):
cf.register_option(
"reader",
"auto",
reader_engine_doc.format(ext="xlsm", others=", ".join(_xlsm_options)),
validator=str,
validator=is_one_of_factory(_xlsm_options + ["auto"]),
)


Expand All @@ -541,7 +541,7 @@ def use_inf_as_na_cb(key):
"reader",
"auto",
reader_engine_doc.format(ext="xlsx", others=", ".join(_xlsx_options)),
validator=str,
validator=is_one_of_factory(_xlsx_options + ["auto"]),
)


Expand All @@ -550,15 +550,15 @@ def use_inf_as_na_cb(key):
"reader",
"auto",
reader_engine_doc.format(ext="ods", others=", ".join(_ods_options)),
validator=str,
validator=is_one_of_factory(_ods_options + ["auto"]),
)

with cf.config_prefix("io.excel.xlsb"):
cf.register_option(
"reader",
"auto",
reader_engine_doc.format(ext="xlsb", others=", ".join(_xlsb_options)),
validator=str,
validator=is_one_of_factory(_xlsb_options + ["auto"]),
)

# Set up the io.excel specific writer configuration.
Expand Down
40 changes: 22 additions & 18 deletions pandas/io/excel/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from pandas.io.common import IOHandles, get_handle, stringify_path, validate_header_arg
from pandas.io.excel._util import (
fill_mi_header,
get_default_writer,
get_default_engine,
get_writer,
maybe_convert_usecols,
pop_header_name,
Expand Down Expand Up @@ -123,6 +123,10 @@
then `odf <https://pypi.org/project/odfpy/>`_ will be used.
- Otherwise if ``path_or_buffer`` is an xls format,
``xlrd`` will be used.
- Otherwise if ``path_or_buffer`` is in xlsb format,
``pyxlsb`` will be used.

.. versionadded:: 1.3.0
- Otherwise if `openpyxl <https://pypi.org/project/openpyxl/>`_ is installed,
then ``openpyxl`` will be used.
- Otherwise if ``xlrd >= 2.0`` is installed, a ``ValueError`` will be raised.
Expand Down Expand Up @@ -701,7 +705,7 @@ def __new__(cls, path, engine=None, **kwargs):
try:
engine = config.get_option(f"io.excel.{ext}.writer", silent=True)
if engine == "auto":
engine = get_default_writer(ext)
engine = get_default_engine(ext, mode="write")
except KeyError as err:
raise ValueError(f"No engine for filetype: '{ext}'") from err

Expand Down Expand Up @@ -1003,6 +1007,10 @@ class ExcelFile:
then `odf <https://pypi.org/project/odfpy/>`_ will be used.
- Otherwise if ``path_or_buffer`` is an xls format,
``xlrd`` will be used.
- Otherwise if ``path_or_buffer`` is in xlsb format,
``pyxlsb`` will be used.

.. versionadded:: 1.3.0
- Otherwise if `openpyxl <https://pypi.org/project/openpyxl/>`_ is installed,
then ``openpyxl`` will be used.
- Otherwise if ``xlrd >= 2.0`` is installed, a ``ValueError`` will be raised.
Expand Down Expand Up @@ -1064,21 +1072,18 @@ def __init__(
)

if engine is None:
if ext == "ods":
engine = "odf"
elif ext == "xls":
engine = "xlrd"
else:
# GH 35029 - Prefer openpyxl except for xls files
if (
import_optional_dependency(
"openpyxl", raise_on_missing=False, on_version="ignore"
)
is not None
):
engine = "openpyxl"
else:
engine = "xlrd"
if (
import_optional_dependency(
"openpyxl", raise_on_missing=False, on_version="warn"
)
is None
and xlrd_version is not None
):
config.set_option("io.excel.xlsx.reader", "xlrd")
# ext will always be valid, otherwise inspect_excel_format would raise
engine = config.get_option(f"io.excel.{ext}.reader", silent=True)
if engine == "auto":
engine = get_default_engine(ext, mode="read")

if engine == "xlrd" and ext != "xls" and xlrd_version is not None:
if xlrd_version >= "2":
Expand Down Expand Up @@ -1106,7 +1111,6 @@ def __init__(
FutureWarning,
stacklevel=stacklevel,
)
assert engine in self._engines, f"Engine {engine} not recognized"

self.engine = engine
self.storage_options = storage_options
Expand Down
30 changes: 22 additions & 8 deletions pandas/io/excel/_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,32 +23,46 @@ def register_writer(klass):
_writers[engine_name] = klass


def get_default_writer(ext):
def get_default_engine(ext, mode="read"):
"""
Return the default writer for the given extension.
Return the default reader/writer for the given extension.

Parameters
----------
ext : str
The excel file extension for which to get the default engine.
mode : str
Whether to get the default engine for reading or writing.
Either 'read' or 'write'

Returns
-------
str
The default engine for the extension.
"""
_default_readers = {
"xlsx": "openpyxl",
"xlsm": "openpyxl",
"xlsb": "pyxlsb",
"xls": "xlrd",
"ods": "odf",
}
_default_writers = {
"xlsx": "openpyxl",
"xlsm": "openpyxl",
"xlsb": "pyxlsb",
"xls": "xlwt",
"ods": "odf",
}
xlsxwriter = import_optional_dependency(
"xlsxwriter", raise_on_missing=False, on_version="warn"
)
if xlsxwriter:
_default_writers["xlsx"] = "xlsxwriter"
return _default_writers[ext]
if mode == "write":
xlsxwriter = import_optional_dependency(
"xlsxwriter", raise_on_missing=False, on_version="warn"
)
if xlsxwriter:
_default_writers["xlsx"] = "xlsxwriter"
return _default_writers[ext]
else:
return _default_readers[ext]


def get_writer(engine_name):
Expand Down
9 changes: 7 additions & 2 deletions pandas/tests/io/excel/test_readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1181,8 +1181,6 @@ def test_excel_read_binary(self, engine, read_ext):

def test_excel_read_binary_via_read_excel(self, read_ext, engine):
# GH 38424
if read_ext == ".xlsb" and engine == "pyxlsb":
pytest.xfail("GH 38667 - should default to pyxlsb but doesn't")
with open("test1" + read_ext, "rb") as f:
result = pd.read_excel(f)
expected = pd.read_excel("test1" + read_ext, engine=engine)
Expand Down Expand Up @@ -1229,3 +1227,10 @@ def test_read_datetime_multiindex(self, engine, read_ext):
expected = DataFrame([], columns=expected_column_index)

tm.assert_frame_equal(expected, actual)

def test_engine_invalid_option(self, read_ext):
# read_ext includes the '.' hence the weird formatting
engine = pd.get_option(f"io.excel{read_ext}.reader")
with pytest.raises(ValueError, match="Value must be one of *"):
pd.set_option(f"io.excel{read_ext}.reader", "abc")
pd.set_option(f"io.excel{read_ext}.reader", engine)