diff --git a/doc/source/release.rst b/doc/source/release.rst index a2b525a737879..2340ff7495551 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -44,6 +44,8 @@ pandas 0.13 - Text parser now treats anything that reads like inf ("inf", "Inf", "-Inf", "iNf", etc.) to infinity. (:issue:`4220`, :issue:`4219`), affecting ``read_table``, ``read_csv``, etc. + - Created get_effective_cell for getting the contents of Excel cell + when the cell is merged (:issue:`4672`) **API Changes** diff --git a/pandas/io/excel.py b/pandas/io/excel.py index 534a88e303dbf..ef5fdf8b68880 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -78,10 +78,10 @@ def __init__(self, path_or_buf, kind=None, **kwds): self.tmpfile = None if isinstance(path_or_buf, compat.string_types): - self.book = xlrd.open_workbook(path_or_buf) + self.book = xlrd.open_workbook(path_or_buf, **kwds) else: data = path_or_buf.read() - self.book = xlrd.open_workbook(file_contents=data) + self.book = xlrd.open_workbook(file_contents=data, **kwds) def parse(self, sheetname, header=0, skiprows=None, skip_footer=0, index_col=None, parse_cols=None, parse_dates=False, @@ -461,3 +461,17 @@ def _writecells_xls(self, cells, sheet_name, startrow, startcol): wks.write(startrow + cell.row, startcol + cell.col, val, style) + +def get_effective_cell(sheet, rowx, colx): + import xlrd # throw an ImportError if we need to + + cell_at_loc = sheet.cell(rowx, colx) + if len(sheet.merged_cells) == 0 or cell_at_loc.ctype != xlrd.XL_CELL_BLANK: + return sheet.cell(rowx, colx) + + for merged_cell in sheet.merged_cells: + rlo, rhi, clo, chi = merged_cell + if rowx >= rlo and rowx < rhi and colx >= clo and colx < chi: + return sheet.cell(rlo, clo) + + return cell_at_loc diff --git a/pandas/io/tests/data/merged_effective.xls b/pandas/io/tests/data/merged_effective.xls new file mode 100755 index 0000000000000..e3f8526a31533 Binary files /dev/null and b/pandas/io/tests/data/merged_effective.xls differ diff --git a/pandas/io/tests/test_excel.py b/pandas/io/tests/test_excel.py index 1ac4d4e31ed10..695822f3efc10 100644 --- a/pandas/io/tests/test_excel.py +++ b/pandas/io/tests/test_excel.py @@ -18,7 +18,7 @@ import pandas.io.parsers as parsers from pandas.io.parsers import (read_csv, read_table, read_fwf, TextParser, TextFileReader) -from pandas.io.excel import ExcelFile, ExcelWriter, read_excel +from pandas.io.excel import ExcelFile, ExcelWriter, read_excel, get_effective_cell from pandas.util.testing import (assert_almost_equal, assert_series_equal, network, @@ -259,6 +259,35 @@ def test_excel_table(self): skip_footer=1) tm.assert_frame_equal(df4, df.ix[:-1]) tm.assert_frame_equal(df4, df5) + + def test_read_effective_cells(self): + _skip_if_no_xlrd() + import xlrd + + pth = os.path.join(self.dirpath, 'merged_effective.xls') + xls = ExcelFile(pth, formatting_info=True) + book = xls.book + sheet = book.sheet_by_index(0) + self.assertEqual(get_effective_cell(sheet, 0, 0).value, 1) + self.assertEqual(get_effective_cell(sheet, 0, 4).value, 5) + + self.assertEqual(get_effective_cell(sheet, 1, 0).value, "a") + self.assertEqual(get_effective_cell(sheet, 1, 1).value, "b") #Top left of merged + self.assertEqual(get_effective_cell(sheet, 1, 2).value, "b") #merged + self.assertEqual(get_effective_cell(sheet, 1, 3).value, "c") + self.assertEqual(get_effective_cell(sheet, 1, 4).value, "d") + + self.assert_(get_effective_cell(sheet, 2, 0).ctype in(xlrd.XL_CELL_BLANK, xlrd.XL_CELL_EMPTY)) + self.assertEqual(get_effective_cell(sheet, 2, 1).value, "b") #merged + self.assertEqual(get_effective_cell(sheet, 2, 2).value, "b") #merged + self.assert_(get_effective_cell(sheet, 2, 3).ctype in(xlrd.XL_CELL_BLANK, xlrd.XL_CELL_EMPTY)) + self.assert_(get_effective_cell(sheet, 2, 4).ctype in(xlrd.XL_CELL_BLANK, xlrd.XL_CELL_EMPTY)) + + self.assertEqual(get_effective_cell(sheet, 3, 0).value, 1) + self.assert_(get_effective_cell(sheet, 3, 1).ctype in(xlrd.XL_CELL_BLANK, xlrd.XL_CELL_EMPTY)) + self.assert_(get_effective_cell(sheet, 3, 2).ctype in(xlrd.XL_CELL_BLANK, xlrd.XL_CELL_EMPTY)) + self.assertEqual(get_effective_cell(sheet, 3, 3).value, 4) + self.assertEqual(get_effective_cell(sheet, 3, 4).value, 5) def test_excel_read_buffer(self): _skip_if_no_xlrd()