From a22c9fe396c7254bfe0ee798aaa9a0238d45291e Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 29 May 2018 15:13:23 -0700 Subject: [PATCH 1/8] Added tests --- pandas/tests/io/test_excel.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index 05423474f330a..2a339f697615a 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -2006,6 +2006,24 @@ def test_write_cells_merge_styled(self, merge_cells, ext, engine): assert xcell_b1.font == openpyxl_sty_merged assert xcell_a2.font == openpyxl_sty_merged + @pytest.mark.parametrize("mode,sheet_count", [('w', 1), ('a', 3)]) + def test_write_append_mode(self, merge_cells, ext, engine, mode, + sheet_count): + df = DataFrame(np.random.randn(3, 10)) + + with ensure_clean(ext) as infile, ensure_clean(ext) as outfile: + wb = openpyxl.Workbook() + wb.create_sheet('foo') + wb.create_sheet('bar') + wb.save(infile) + + writer = pd.ExcelWriter(outfile, engine=engine, mode=mode) + df.to_excel(writer, sheet_name='baz') + writer.save() + + wb2 = openpyxl.load_workbook(outfile) + assert len(wb2.worksheets) == sheet_count + @td.skip_if_no('xlwt') @pytest.mark.parametrize("merge_cells,ext,engine", [ @@ -2060,6 +2078,13 @@ def test_to_excel_styleconverter(self, merge_cells, ext, engine): assert xlwt.Alignment.HORZ_CENTER == xls_style.alignment.horz assert xlwt.Alignment.VERT_TOP == xls_style.alignment.vert + def test_write_append_mode_raises(self, merge_cells, ext, engine): + msg = "Append mode is not supported with xlwt!" + + with ensure_clean(ext) as f: + with tm.assert_raises_regexp("ValueError", msg): + pd.ExcelWriter(f, engine=engine, mode='a') + @td.skip_if_no('xlsxwriter') @pytest.mark.parametrize("merge_cells,ext,engine", [ @@ -2111,6 +2136,13 @@ def test_column_format(self, merge_cells, ext, engine): assert read_num_format == num_format + def test_write_append_mode_raises(self, merge_cells, ext, engine): + msg = "Append mode is not supported with xlsxwriter!" + + with ensure_clean(ext) as f: + with tm.assert_raises_regexp("ValueError", msg): + pd.ExcelWriter(f, engine=engine, mode='a') + class TestExcelWriterEngineTests(object): From bfe915500d74b9e628690ba5c96ff058745ee02b Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 29 May 2018 16:01:53 -0700 Subject: [PATCH 2/8] Initial working code --- pandas/io/excel.py | 50 ++++++++++++++++++++++++----------- pandas/tests/io/test_excel.py | 19 ++++++------- 2 files changed, 45 insertions(+), 24 deletions(-) diff --git a/pandas/io/excel.py b/pandas/io/excel.py index 5608c29637447..04105212c800d 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -804,6 +804,8 @@ class ExcelWriter(object): datetime_format : string, default None Format string for datetime objects written into Excel files (e.g. 'YYYY-MM-DD HH:MM:SS') + mode : {'w' or 'a'}, default 'w' + File mode to use (write or append). Notes ----- @@ -897,7 +899,8 @@ def save(self): pass def __init__(self, path, engine=None, - date_format=None, datetime_format=None, **engine_kwargs): + date_format=None, datetime_format=None, mode='w', + **engine_kwargs): # validate that this engine can handle the extension if isinstance(path, string_types): ext = os.path.splitext(path)[-1] @@ -919,6 +922,8 @@ def __init__(self, path, engine=None, else: self.datetime_format = datetime_format + self.mode = mode + def __fspath__(self): return _stringify_path(self.path) @@ -993,23 +998,28 @@ class _OpenpyxlWriter(ExcelWriter): engine = 'openpyxl' supported_extensions = ('.xlsx', '.xlsm') - def __init__(self, path, engine=None, **engine_kwargs): + def __init__(self, path, engine=None, mode='w', **engine_kwargs): # Use the openpyxl module as the Excel writer. from openpyxl.workbook import Workbook - super(_OpenpyxlWriter, self).__init__(path, **engine_kwargs) + super(_OpenpyxlWriter, self).__init__(path, mode=mode, **engine_kwargs) - # Create workbook object with default optimized_write=True. - self.book = Workbook() + if self.mode == 'a': # Load from existing workbook + from openpyxl import load_workbook + book = load_workbook(self.path) + self.book = book + else: + # Create workbook object with default optimized_write=True. + self.book = Workbook() - # Openpyxl 1.6.1 adds a dummy sheet. We remove it. - if self.book.worksheets: - try: - self.book.remove(self.book.worksheets[0]) - except AttributeError: + # Openpyxl 1.6.1 adds a dummy sheet. We remove it. + if self.book.worksheets: + try: + self.book.remove(self.book.worksheets[0]) + except AttributeError: - # compat - self.book.remove_sheet(self.book.worksheets[0]) + # compat + self.book.remove_sheet(self.book.worksheets[0]) def save(self): """ @@ -1443,11 +1453,16 @@ class _XlwtWriter(ExcelWriter): engine = 'xlwt' supported_extensions = ('.xls',) - def __init__(self, path, engine=None, encoding=None, **engine_kwargs): + def __init__(self, path, engine=None, encoding=None, mode='w', + **engine_kwargs): # Use the xlwt module as the Excel writer. import xlwt engine_kwargs['engine'] = engine - super(_XlwtWriter, self).__init__(path, **engine_kwargs) + + if mode == 'a': + raise ValueError('Append mode is not supported with xlwt!') + + super(_XlwtWriter, self).__init__(path, mode=mode, **engine_kwargs) if encoding is None: encoding = 'ascii' @@ -1713,13 +1728,18 @@ class _XlsxWriter(ExcelWriter): supported_extensions = ('.xlsx',) def __init__(self, path, engine=None, - date_format=None, datetime_format=None, **engine_kwargs): + date_format=None, datetime_format=None, mode='w', + **engine_kwargs): # Use the xlsxwriter module as the Excel writer. import xlsxwriter + if mode == 'a': + raise ValueError('Append mode is not supported with xlsxwriter!') + super(_XlsxWriter, self).__init__(path, engine=engine, date_format=date_format, datetime_format=datetime_format, + mode=mode, **engine_kwargs) self.book = xlsxwriter.Workbook(path, **engine_kwargs) diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index 2a339f697615a..c78b950597f44 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -2009,19 +2009,20 @@ def test_write_cells_merge_styled(self, merge_cells, ext, engine): @pytest.mark.parametrize("mode,sheet_count", [('w', 1), ('a', 3)]) def test_write_append_mode(self, merge_cells, ext, engine, mode, sheet_count): + import openpyxl df = DataFrame(np.random.randn(3, 10)) - with ensure_clean(ext) as infile, ensure_clean(ext) as outfile: + with ensure_clean(ext) as f: wb = openpyxl.Workbook() - wb.create_sheet('foo') + wb.worksheets[0].title = 'foo' wb.create_sheet('bar') - wb.save(infile) + wb.save(f) - writer = pd.ExcelWriter(outfile, engine=engine, mode=mode) + writer = ExcelWriter(f, engine=engine, mode=mode) df.to_excel(writer, sheet_name='baz') writer.save() - wb2 = openpyxl.load_workbook(outfile) + wb2 = openpyxl.load_workbook(f) assert len(wb2.worksheets) == sheet_count @@ -2082,8 +2083,8 @@ def test_write_append_mode_raises(self, merge_cells, ext, engine): msg = "Append mode is not supported with xlwt!" with ensure_clean(ext) as f: - with tm.assert_raises_regexp("ValueError", msg): - pd.ExcelWriter(f, engine=engine, mode='a') + with tm.assert_raises_regex(ValueError, msg): + ExcelWriter(f, engine=engine, mode='a') @td.skip_if_no('xlsxwriter') @@ -2140,8 +2141,8 @@ def test_write_append_mode_raises(self, merge_cells, ext, engine): msg = "Append mode is not supported with xlsxwriter!" with ensure_clean(ext) as f: - with tm.assert_raises_regexp("ValueError", msg): - pd.ExcelWriter(f, engine=engine, mode='a') + with tm.assert_raises_regex(ValueError, msg): + ExcelWriter(f, engine=engine, mode='a') class TestExcelWriterEngineTests(object): From b516f895951327e20748b887056a667ff97e5001 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 29 May 2018 16:33:15 -0700 Subject: [PATCH 3/8] Improved test coverage --- pandas/tests/io/test_excel.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index c78b950597f44..0c0594f23f40b 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -2006,24 +2006,31 @@ def test_write_cells_merge_styled(self, merge_cells, ext, engine): assert xcell_b1.font == openpyxl_sty_merged assert xcell_a2.font == openpyxl_sty_merged - @pytest.mark.parametrize("mode,sheet_count", [('w', 1), ('a', 3)]) - def test_write_append_mode(self, merge_cells, ext, engine, mode, - sheet_count): + @pytest.mark.parametrize("mode,expected", [ + ('w', ['baz']), ('a', ['foo', 'bar', 'baz'])]) + def test_write_append_mode(self, merge_cells, ext, engine, mode, expected): import openpyxl - df = DataFrame(np.random.randn(3, 10)) + df = DataFrame([1], columns=['baz']) with ensure_clean(ext) as f: wb = openpyxl.Workbook() wb.worksheets[0].title = 'foo' + wb.worksheets[0]['A1'].value = 'foo' wb.create_sheet('bar') + wb.worksheets[1]['A1'].value = 'bar' wb.save(f) + wb.close() writer = ExcelWriter(f, engine=engine, mode=mode) - df.to_excel(writer, sheet_name='baz') + df.to_excel(writer, sheet_name='baz', index=False) writer.save() wb2 = openpyxl.load_workbook(f) - assert len(wb2.worksheets) == sheet_count + result = [wb.title for wb in wb2.worksheets] + assert result == expected + + for index, cell_value in enumerate(expected): + assert wb2.worksheets[index]['A1'].value == cell_value @td.skip_if_no('xlwt') From 09e5b456e1af5cde55f18f903ab90c761643b05a Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 29 May 2018 16:38:18 -0700 Subject: [PATCH 4/8] Updated whatsnew --- doc/source/whatsnew/v0.23.1.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.23.1.txt b/doc/source/whatsnew/v0.23.1.txt index 35484e34ee9eb..2b2ea5c7dad44 100644 --- a/doc/source/whatsnew/v0.23.1.txt +++ b/doc/source/whatsnew/v0.23.1.txt @@ -16,6 +16,7 @@ New features ~~~~~~~~~~~~ - :meth:`Index.droplevel` is now implemented also for flat indexes, for compatibility with MultiIndex (:issue:`21115`) +- ``ExcelWriter`` now accepts ``mode`` as a keyword argument, enabling append to existing workbooks when using the ``openpyxl`` engine (:issue:`3441`) .. _whatsnew_0231.deprecations: From 8acadb336fe27fd1ecde305b3cfb8b094af3448b Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 29 May 2018 17:11:57 -0700 Subject: [PATCH 5/8] Removed errant close call --- pandas/tests/io/test_excel.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index 0c0594f23f40b..1fe84d73756e9 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -2019,7 +2019,6 @@ def test_write_append_mode(self, merge_cells, ext, engine, mode, expected): wb.create_sheet('bar') wb.worksheets[1]['A1'].value = 'bar' wb.save(f) - wb.close() writer = ExcelWriter(f, engine=engine, mode=mode) df.to_excel(writer, sheet_name='baz', index=False) From 64b0a6304e4b1db915b779d27f321a6b3edaff93 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Wed, 30 May 2018 11:29:09 -0700 Subject: [PATCH 6/8] Updated documentation --- doc/source/whatsnew/v0.23.1.txt | 2 -- doc/source/whatsnew/v0.24.0.txt | 2 ++ pandas/io/excel.py | 5 +++-- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v0.23.1.txt b/doc/source/whatsnew/v0.23.1.txt index 2b2ea5c7dad44..12c78bbb39172 100644 --- a/doc/source/whatsnew/v0.23.1.txt +++ b/doc/source/whatsnew/v0.23.1.txt @@ -16,8 +16,6 @@ New features ~~~~~~~~~~~~ - :meth:`Index.droplevel` is now implemented also for flat indexes, for compatibility with MultiIndex (:issue:`21115`) -- ``ExcelWriter`` now accepts ``mode`` as a keyword argument, enabling append to existing workbooks when using the ``openpyxl`` engine (:issue:`3441`) - .. _whatsnew_0231.deprecations: diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index e931450cb5c01..6997ea84e5b83 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -8,6 +8,8 @@ v0.24.0 New features ~~~~~~~~~~~~ +- ``ExcelWriter`` now accepts ``mode`` as a keyword argument, enabling append to existing workbooks when using the ``openpyxl`` engine (:issue:`3441`) + .. _whatsnew_0240.enhancements.other: Other Enhancements diff --git a/pandas/io/excel.py b/pandas/io/excel.py index 04105212c800d..e86d33742b266 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -807,6 +807,8 @@ class ExcelWriter(object): mode : {'w' or 'a'}, default 'w' File mode to use (write or append). + .. versionadded:: 0.24.0 + Notes ----- For compatibility with CSV writers, ExcelWriter serializes lists @@ -1012,13 +1014,12 @@ def __init__(self, path, engine=None, mode='w', **engine_kwargs): # Create workbook object with default optimized_write=True. self.book = Workbook() - # Openpyxl 1.6.1 adds a dummy sheet. We remove it. if self.book.worksheets: try: self.book.remove(self.book.worksheets[0]) except AttributeError: - # compat + # compat - for openpyxl <= 2.4 self.book.remove_sheet(self.book.worksheets[0]) def save(self): From e6242e6478ed04d8268bbab4fc229eb5084e9391 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Wed, 30 May 2018 11:32:23 -0700 Subject: [PATCH 7/8] Reverted changed to v0.23.1 whatsnew --- doc/source/whatsnew/v0.23.1.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.23.1.txt b/doc/source/whatsnew/v0.23.1.txt index 12c78bbb39172..35484e34ee9eb 100644 --- a/doc/source/whatsnew/v0.23.1.txt +++ b/doc/source/whatsnew/v0.23.1.txt @@ -17,6 +17,7 @@ New features - :meth:`Index.droplevel` is now implemented also for flat indexes, for compatibility with MultiIndex (:issue:`21115`) + .. _whatsnew_0231.deprecations: Deprecations From e0c5b69bb70442295db06528a3719e4b532afbc0 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 1 Jun 2018 10:23:54 -0700 Subject: [PATCH 8/8] Compat LINT fixup --- pandas/tests/io/test_excel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index 1fe84d73756e9..2a225e6fe6a45 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -2025,7 +2025,7 @@ def test_write_append_mode(self, merge_cells, ext, engine, mode, expected): writer.save() wb2 = openpyxl.load_workbook(f) - result = [wb.title for wb in wb2.worksheets] + result = [sheet.title for sheet in wb2.worksheets] assert result == expected for index, cell_value in enumerate(expected):