Skip to content

Commit 6ed3a9d

Browse files
committed
bpo-43950: support positions for dis.Instructions created through dis.Bytecode (pythonGH-28142)
1 parent f235dd0 commit 6ed3a9d

File tree

3 files changed

+81
-13
lines changed

3 files changed

+81
-13
lines changed

Parser/pegen.c

+54-3
Original file line numberDiff line numberDiff line change
@@ -410,7 +410,54 @@ get_error_line(Parser *p, Py_ssize_t lineno)
410410
}
411411

412412
Py_ssize_t
413-
_PyPegen_byte_offset_to_character_offset(PyObject *line, Py_ssize_t col_offset)
413+
byte_offset_difference_for_unicode(Py_ssize_t size, PyObject *text)
414+
{
415+
if (PyUnicode_IS_ASCII(text)) {
416+
return 0;
417+
}
418+
419+
PyObject *unicode_data = PyImport_ImportModuleNoBlock("unicodedata");
420+
if (!unicode_data) {
421+
return -1;
422+
}
423+
424+
PyObject *east_asian_width = PyObject_GetAttrString(unicode_data, "east_asian_width");
425+
Py_DECREF(unicode_data);
426+
if (!east_asian_width) {
427+
return -1;
428+
}
429+
430+
PyObject *current, *width = NULL;
431+
Py_ssize_t i, difference = 0;
432+
for (i = 0; i < size; i++) {
433+
current = PySequence_GetItem(text, i);
434+
if (!current) {
435+
goto error;
436+
}
437+
438+
PyObject *width = PyObject_CallOneArg(east_asian_width, current);
439+
Py_DECREF(current);
440+
if (!width) {
441+
goto error;
442+
}
443+
444+
if (_PyUnicode_EqualToASCIIString(width, "W") ||
445+
_PyUnicode_EqualToASCIIString(width, "F")) {
446+
++difference;
447+
}
448+
Py_DECREF(width);
449+
}
450+
return difference;
451+
452+
error:
453+
Py_DECREF(east_asian_width);
454+
Py_XDECREF(current);
455+
Py_XDECREF(width);
456+
return -1;
457+
}
458+
459+
Py_ssize_t
460+
_PyPegen_byte_offset_to_character_offset(PyObject *line, Py_ssize_t col_offset, Py_ssize_t *difference)
414461
{
415462
const char *str = PyUnicode_AsUTF8(line);
416463
if (!str) {
@@ -425,7 +472,10 @@ _PyPegen_byte_offset_to_character_offset(PyObject *line, Py_ssize_t col_offset)
425472
if (!text) {
426473
return -1;
427474
}
475+
428476
Py_ssize_t size = PyUnicode_GET_LENGTH(text);
477+
*difference = byte_offset_difference_for_unicode(size, text);
478+
429479
Py_DECREF(text);
430480
return size;
431481
}
@@ -508,14 +558,15 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
508558

509559
Py_ssize_t col_number = col_offset;
510560
Py_ssize_t end_col_number = end_col_offset;
561+
Py_ssize_t start_difference, end_difference;
511562

512563
if (p->tok->encoding != NULL) {
513-
col_number = _PyPegen_byte_offset_to_character_offset(error_line, col_offset);
564+
col_number = _PyPegen_byte_offset_to_character_offset(error_line, col_offset, &start_difference);
514565
if (col_number < 0) {
515566
goto error;
516567
}
517568
if (end_col_number > 0) {
518-
Py_ssize_t end_col_offset = _PyPegen_byte_offset_to_character_offset(error_line, end_col_number);
569+
Py_ssize_t end_col_offset = _PyPegen_byte_offset_to_character_offset(error_line, end_col_number, &end_difference);
519570
if (end_col_offset < 0) {
520571
goto error;
521572
} else {

Parser/pegen.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ expr_ty _PyPegen_name_token(Parser *p);
140140
expr_ty _PyPegen_number_token(Parser *p);
141141
void *_PyPegen_string_token(Parser *p);
142142
const char *_PyPegen_get_expr_name(expr_ty);
143-
Py_ssize_t _PyPegen_byte_offset_to_character_offset(PyObject *line, Py_ssize_t col_offset);
143+
Py_ssize_t _PyPegen_byte_offset_to_character_offset(PyObject *line, Py_ssize_t col_offset, Py_ssize_t *difference);
144144
void *_PyPegen_raise_error(Parser *p, PyObject *errtype, const char *errmsg, ...);
145145
void *_PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
146146
Py_ssize_t lineno, Py_ssize_t col_offset,

Python/traceback.c

+26-9
Original file line numberDiff line numberDiff line change
@@ -594,8 +594,9 @@ extract_anchors_from_stmt(const char *segment_str, stmt_ty statement, Py_ssize_t
594594
static int
595595
extract_anchors_from_line(PyObject *filename, PyObject *line,
596596
Py_ssize_t start_offset, Py_ssize_t end_offset,
597-
Py_ssize_t *left_anchor, Py_ssize_t *right_anchor,
598-
char** primary_error_char, char** secondary_error_char)
597+
Py_ssize_t start_difference, Py_ssize_t *left_anchor,
598+
Py_ssize_t *right_anchor, char** primary_error_char,
599+
char** secondary_error_char)
599600
{
600601
int res = -1;
601602
PyArena *arena = NULL;
@@ -638,11 +639,21 @@ extract_anchors_from_line(PyObject *filename, PyObject *line,
638639
res = 0;
639640
}
640641

641-
done:
642642
if (res > 0) {
643-
*left_anchor += start_offset;
644-
*right_anchor += start_offset;
643+
Py_ssize_t left_difference, right_difference;
644+
645+
*left_anchor = _PyPegen_byte_offset_to_character_offset(segment, *left_anchor, &left_difference);
646+
*right_anchor = _PyPegen_byte_offset_to_character_offset(segment, *right_anchor, &right_difference);
647+
648+
if (*left_anchor < 0 || *right_anchor < 0) {
649+
res = -1;
650+
goto done;
651+
}
652+
653+
*left_anchor += start_offset + start_difference + left_difference;
654+
*right_anchor += start_offset + start_difference + right_difference;
645655
}
656+
done:
646657
Py_XDECREF(segment);
647658
if (arena) {
648659
_PyArena_Free(arena);
@@ -665,7 +676,7 @@ ignore_source_errors(void) {
665676

666677
static inline int
667678
print_error_location_carets(PyObject *f, int offset, Py_ssize_t start_offset, Py_ssize_t end_offset,
668-
Py_ssize_t right_start_offset, Py_ssize_t left_end_offset,
679+
Py_ssize_t left_end_offset, Py_ssize_t right_start_offset,
669680
const char *primary, const char *secondary) {
670681
int err = 0;
671682
int special_chars = (left_end_offset != -1 || right_start_offset != -1);
@@ -744,13 +755,15 @@ tb_displayline(PyTracebackObject* tb, PyObject *f, PyObject *filename, int linen
744755

745756
// Convert the utf-8 byte offset to the actual character offset so we print the right number of carets.
746757
assert(source_line);
747-
Py_ssize_t start_offset = _PyPegen_byte_offset_to_character_offset(source_line, start_col_byte_offset);
758+
759+
Py_ssize_t start_difference, end_difference;
760+
Py_ssize_t start_offset = _PyPegen_byte_offset_to_character_offset(source_line, start_col_byte_offset, &start_difference);
748761
if (start_offset < 0) {
749762
err = ignore_source_errors() < 0;
750763
goto done;
751764
}
752765

753-
Py_ssize_t end_offset = _PyPegen_byte_offset_to_character_offset(source_line, end_col_byte_offset);
766+
Py_ssize_t end_offset = _PyPegen_byte_offset_to_character_offset(source_line, end_col_byte_offset, &end_difference);
754767
if (end_offset < 0) {
755768
err = ignore_source_errors() < 0;
756769
goto done;
@@ -764,6 +777,7 @@ tb_displayline(PyTracebackObject* tb, PyObject *f, PyObject *filename, int linen
764777

765778
if (start_line == end_line) {
766779
int res = extract_anchors_from_line(filename, source_line, start_offset, end_offset,
780+
start_difference,
767781
&left_end_offset, &right_start_offset,
768782
&primary_error_char, &secondary_error_char);
769783
if (res < 0 && ignore_source_errors() < 0) {
@@ -788,8 +802,11 @@ tb_displayline(PyTracebackObject* tb, PyObject *f, PyObject *filename, int linen
788802
end_offset = i + 1;
789803
}
790804

805+
start_offset += start_difference;
806+
end_offset += end_difference;
807+
791808
err = print_error_location_carets(f, truncation, start_offset, end_offset,
792-
right_start_offset, left_end_offset,
809+
left_end_offset, right_start_offset,
793810
primary_error_char, secondary_error_char);
794811

795812
done:

0 commit comments

Comments
 (0)