Skip to content

Commit 081738d

Browse files
committed
Refactor and simplify the traceback code
1 parent 26430d4 commit 081738d

File tree

1 file changed

+100
-69
lines changed

1 file changed

+100
-69
lines changed

Python/traceback.c

+100-69
Original file line numberDiff line numberDiff line change
@@ -536,9 +536,10 @@ _Py_DisplaySourceLine(PyObject *f, PyObject *filename, int lineno, int indent, i
536536
#define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\f'))
537537

538538
static int
539-
extract_anchors_from_expr(const char *segment_str, expr_ty expr, int *left_anchor, int *right_anchor)
539+
extract_anchors_from_expr(const char *segment_str, expr_ty expr, Py_ssize_t *left_anchor, Py_ssize_t *right_anchor,
540+
char** primary_error_char, char** secondary_error_char)
540541
{
541-
switch (expr->kind) {
542+
switch (expr->kind) {
542543
case BinOp_kind: {
543544
expr_ty left = expr->v.BinOp.left;
544545
expr_ty right = expr->v.BinOp.right;
@@ -554,13 +555,21 @@ extract_anchors_from_expr(const char *segment_str, expr_ty expr, int *left_ancho
554555
if (i + 1 < right->col_offset && !IS_WHITESPACE(segment_str[i + 1])) {
555556
++*right_anchor;
556557
}
558+
559+
// Set the error characters
560+
*primary_error_char = "~";
561+
*secondary_error_char = "^";
557562
break;
558563
}
559564
return 1;
560565
}
561566
case Subscript_kind: {
562567
*left_anchor = expr->v.Subscript.value->end_col_offset;
563568
*right_anchor = expr->v.Subscript.slice->end_col_offset + 1;
569+
570+
// Set the error characters
571+
*primary_error_char = "~";
572+
*secondary_error_char = "^";
564573
return 1;
565574
}
566575
default:
@@ -569,11 +578,13 @@ extract_anchors_from_expr(const char *segment_str, expr_ty expr, int *left_ancho
569578
}
570579

571580
static int
572-
extract_anchors_from_stmt(const char *segment_str, stmt_ty statement, int *left_anchor, int *right_anchor)
581+
extract_anchors_from_stmt(const char *segment_str, stmt_ty statement, Py_ssize_t *left_anchor, Py_ssize_t *right_anchor,
582+
char** primary_error_char, char** secondary_error_char)
573583
{
574584
switch (statement->kind) {
575585
case Expr_kind: {
576-
return extract_anchors_from_expr(segment_str, statement->v.Expr.value, left_anchor, right_anchor);
586+
return extract_anchors_from_expr(segment_str, statement->v.Expr.value, left_anchor, right_anchor,
587+
primary_error_char, secondary_error_char);
577588
}
578589
default:
579590
return 0;
@@ -583,7 +594,8 @@ extract_anchors_from_stmt(const char *segment_str, stmt_ty statement, int *left_
583594
static int
584595
extract_anchors_from_line(PyObject *filename, PyObject *line,
585596
Py_ssize_t start_offset, Py_ssize_t end_offset,
586-
int *left_anchor, int *right_anchor)
597+
Py_ssize_t *left_anchor, Py_ssize_t *right_anchor,
598+
char** primary_error_char, char** secondary_error_char)
587599
{
588600
int res = -1;
589601
PyArena *arena = NULL;
@@ -620,12 +632,17 @@ extract_anchors_from_line(PyObject *filename, PyObject *line,
620632
assert(module->kind == Module_kind);
621633
if (asdl_seq_LEN(module->v.Module.body) == 1) {
622634
stmt_ty statement = asdl_seq_GET(module->v.Module.body, 0);
623-
res = extract_anchors_from_stmt(segment_str, statement, left_anchor, right_anchor);
635+
res = extract_anchors_from_stmt(segment_str, statement, left_anchor, right_anchor,
636+
primary_error_char, secondary_error_char);
624637
} else {
625638
res = 0;
626639
}
627640

628641
done:
642+
if (res > 0) {
643+
*left_anchor += start_offset;
644+
*right_anchor += start_offset;
645+
}
629646
Py_XDECREF(segment);
630647
if (arena) {
631648
_PyArena_Free(arena);
@@ -646,6 +663,25 @@ ignore_source_errors(void) {
646663
return 0;
647664
}
648665

666+
static inline int
667+
print_error_location_carets(PyObject *f, int offset, Py_ssize_t start_offset, Py_ssize_t end_offset,
668+
Py_ssize_t right_start_offset, Py_ssize_t left_end_offset,
669+
const char *primary, const char *secondary) {
670+
int err = 0;
671+
int special_chars = (left_end_offset != -1 || right_start_offset != -1);
672+
while (++offset <= end_offset) {
673+
if (offset <= start_offset || offset > end_offset) {
674+
err = PyFile_WriteString(" ", f);
675+
} else if (special_chars && left_end_offset < offset && offset <= right_start_offset) {
676+
err = PyFile_WriteString(secondary, f);
677+
} else {
678+
err = PyFile_WriteString(primary, f);
679+
}
680+
}
681+
err = PyFile_WriteString("\n", f);
682+
return err;
683+
}
684+
649685
static int
650686
tb_displayline(PyTracebackObject* tb, PyObject *f, PyObject *filename, int lineno,
651687
PyFrameObject *frame, PyObject *name)
@@ -665,76 +701,71 @@ tb_displayline(PyTracebackObject* tb, PyObject *f, PyObject *filename, int linen
665701
return err;
666702
int truncation = _TRACEBACK_SOURCE_LINE_INDENT;
667703
PyObject* source_line = NULL;
668-
/* ignore errors since we can't report them, can we? */
669-
if (!_Py_DisplaySourceLine(f, filename, lineno, _TRACEBACK_SOURCE_LINE_INDENT,
670-
&truncation, &source_line)) {
671-
int code_offset = tb->tb_lasti;
672-
PyCodeObject* code = _PyFrame_GetCode(frame);
673-
674-
int start_line;
675-
int end_line;
676-
int start_col_byte_offset;
677-
int end_col_byte_offset;
678-
if (!PyCode_Addr2Location(code, code_offset, &start_line, &start_col_byte_offset,
679-
&end_line, &end_col_byte_offset)) {
680-
goto done;
681-
}
682-
if (start_line != end_line) {
683-
goto done;
684-
}
685704

686-
if (start_col_byte_offset < 0 || end_col_byte_offset < 0) {
687-
goto done;
688-
}
689-
690-
// Convert the utf-8 byte offset to the actual character offset so we
691-
// print the right number of carets.
692-
Py_ssize_t start_offset = (Py_ssize_t)start_col_byte_offset;
693-
Py_ssize_t end_offset = (Py_ssize_t)end_col_byte_offset;
694-
695-
if (source_line) {
696-
start_offset = _PyPegen_byte_offset_to_character_offset(source_line, start_col_byte_offset);
697-
end_offset = _PyPegen_byte_offset_to_character_offset(source_line, end_col_byte_offset);
698-
}
705+
if (_Py_DisplaySourceLine(f, filename, lineno, _TRACEBACK_SOURCE_LINE_INDENT,
706+
&truncation, &source_line) != 0) {
707+
/* ignore errors since we can't report them, can we? */
708+
err = ignore_source_errors();
709+
goto done;
710+
}
699711

700-
const char *primary, *secondary;
701-
primary = secondary = "^";
712+
int code_offset = tb->tb_lasti;
713+
PyCodeObject* code = _PyFrame_GetCode(frame);
702714

703-
int left_end_offset = Py_SAFE_DOWNCAST(end_offset, Py_ssize_t, int) - Py_SAFE_DOWNCAST(start_offset, Py_ssize_t, int);
704-
int right_start_offset = left_end_offset;
715+
int start_line;
716+
int end_line;
717+
int start_col_byte_offset;
718+
int end_col_byte_offset;
719+
if (!PyCode_Addr2Location(code, code_offset, &start_line, &start_col_byte_offset,
720+
&end_line, &end_col_byte_offset)) {
721+
goto done;
722+
}
723+
if (start_line != end_line) {
724+
goto done;
725+
}
705726

706-
if (source_line) {
707-
int res = extract_anchors_from_line(filename, source_line, start_offset, end_offset,
708-
&left_end_offset, &right_start_offset);
709-
if (res < 0) {
710-
err = ignore_source_errors();
711-
if (err < 0) {
712-
goto done;
713-
}
714-
} else if (res > 0) {
715-
primary = "^";
716-
secondary = "~";
717-
}
718-
}
727+
if (start_col_byte_offset < 0 || end_col_byte_offset < 0) {
728+
goto done;
729+
}
719730

720-
char offset = truncation;
721-
while (++offset <= end_offset) {
722-
if (offset <= start_offset) {
723-
err = PyFile_WriteString(" ", f);
724-
} else if (offset <= left_end_offset + start_offset) {
725-
err = PyFile_WriteString(secondary, f);
726-
} else if (offset <= right_start_offset + start_offset) {
727-
err = PyFile_WriteString(primary, f);
728-
} else {
729-
err = PyFile_WriteString(secondary, f);
730-
}
731+
// When displaying errors, we will use the following generic structure:
732+
//
733+
// ERROR LINE ERROR LINE ERROR LINE ERROR LINE ERROR LINE ERROR LINE ERROR LINE
734+
// ~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^~~~~~~~~~~~~~~~~~~~
735+
// | |-> left_end_offset | |-> left_offset
736+
// |-> start_offset |-> right_start_offset
737+
//
738+
// In general we will only have (start_offset, end_offset) but we can gather more information
739+
// by analyzing the AST of the text between *start_offset* and *end_offset*. If this succeeds
740+
// we could get *left_end_offset* and *right_start_offset* and some selection of characters for
741+
// the different ranges (primary_error_char and secondary_error_char). If we cannot obtain the
742+
// AST information or we cannot identify special ranges within it, then left_end_offset and
743+
// right_end_offset will be set to -1.
744+
745+
// Convert the utf-8 byte offset to the actual character offset so we print the right number of carets.
746+
Py_ssize_t start_offset = (Py_ssize_t)start_col_byte_offset;
747+
Py_ssize_t end_offset = (Py_ssize_t)end_col_byte_offset;
748+
Py_ssize_t left_end_offset = -1;
749+
Py_ssize_t right_start_offset = -1;
750+
751+
char *primary_error_char = "^";
752+
char *secondary_error_char = primary_error_char;
753+
754+
if (source_line) {
755+
start_offset = _PyPegen_byte_offset_to_character_offset(source_line, start_col_byte_offset);
756+
end_offset = _PyPegen_byte_offset_to_character_offset(source_line, end_col_byte_offset);
757+
int res = extract_anchors_from_line(filename, source_line, start_offset, end_offset,
758+
&left_end_offset, &right_start_offset,
759+
&primary_error_char, &secondary_error_char);
760+
if (res < 0 && ignore_source_errors() < 0) {
761+
goto done;
731762
}
732-
err = PyFile_WriteString("\n", f);
733763
}
734-
else {
735-
err = ignore_source_errors();
736-
}
737-
764+
765+
err = print_error_location_carets(f, truncation, start_offset, end_offset,
766+
right_start_offset, left_end_offset,
767+
primary_error_char, secondary_error_char);
768+
738769
done:
739770
Py_XDECREF(source_line);
740771
return err;

0 commit comments

Comments
 (0)