diff --git a/pymupdf4llm/pymupdf4llm/helpers/__pycache__/get_text_lines.cpython-312-pytest-8.3.4.pyc b/pymupdf4llm/pymupdf4llm/helpers/__pycache__/get_text_lines.cpython-312-pytest-8.3.4.pyc new file mode 100644 index 00000000..9aa2b4c3 Binary files /dev/null and b/pymupdf4llm/pymupdf4llm/helpers/__pycache__/get_text_lines.cpython-312-pytest-8.3.4.pyc differ diff --git a/pymupdf4llm/pymupdf4llm/helpers/get_text_lines.py b/pymupdf4llm/pymupdf4llm/helpers/get_text_lines.py index 8af6a1e8..171ad7f7 100644 --- a/pymupdf4llm/pymupdf4llm/helpers/get_text_lines.py +++ b/pymupdf4llm/pymupdf4llm/helpers/get_text_lines.py @@ -110,10 +110,11 @@ def sanitize_spans(line): continue if s["flags"] & 1 == 1: # if a superscript, modify bbox # with that of the preceding or following span - i = 1 if sno == 0 else sno - 1 - neighbor = line["spans"][i] - sbbox.y1 = neighbor["bbox"][3] - s["text"] = f"[{s['text']}]" + if len(line["spans"]) > 1: + i = 1 if sno == 0 else sno - 1 + neighbor = line["spans"][i] + sbbox.y1 = neighbor["bbox"][3] + s["text"] = f"[{s['text']}]" s["bbox"] = sbbox # update with the Rect version # include line/block numbers to facilitate separator insertion s["line"] = lno diff --git a/tests/pymupdf4llm/llama_index/__pycache__/test_pdf_markdown_reader.cpython-312-pytest-8.3.4.pyc b/tests/pymupdf4llm/llama_index/__pycache__/test_pdf_markdown_reader.cpython-312-pytest-8.3.4.pyc new file mode 100644 index 00000000..d731ca5c Binary files /dev/null and b/tests/pymupdf4llm/llama_index/__pycache__/test_pdf_markdown_reader.cpython-312-pytest-8.3.4.pyc differ