@@ -215,6 +215,7 @@ tokenizeriter_next(tokenizeriterobject *it)
215
215
216
216
const char * line_start = ISSTRINGLIT (type ) ? it -> tok -> multi_line_start : it -> tok -> line_start ;
217
217
PyObject * line = NULL ;
218
+ int line_changed = 1 ;
218
219
if (it -> tok -> tok_extra_tokens && is_trailing_token ) {
219
220
line = PyUnicode_FromString ("" );
220
221
} else {
@@ -229,12 +230,11 @@ tokenizeriter_next(tokenizeriterobject *it)
229
230
Py_XDECREF (it -> last_line );
230
231
line = PyUnicode_DecodeUTF8 (line_start , size , "replace" );
231
232
it -> last_line = line ;
232
- if (it -> tok -> lineno != it -> last_end_lineno ) {
233
- it -> byte_col_offset_diff = 0 ;
234
- }
233
+ it -> byte_col_offset_diff = 0 ;
235
234
} else {
236
235
// Line hasn't changed so we reuse the cached one.
237
236
line = it -> last_line ;
237
+ line_changed = 0 ;
238
238
}
239
239
}
240
240
if (line == NULL ) {
@@ -252,7 +252,13 @@ tokenizeriter_next(tokenizeriterobject *it)
252
252
Py_ssize_t byte_offset = -1 ;
253
253
if (token .start != NULL && token .start >= line_start ) {
254
254
byte_offset = token .start - line_start ;
255
- col_offset = byte_offset - it -> byte_col_offset_diff ;
255
+ if (line_changed ) {
256
+ col_offset = _PyPegen_byte_offset_to_character_offset_line (line , 0 , byte_offset );
257
+ it -> byte_col_offset_diff = byte_offset - col_offset ;
258
+ }
259
+ else {
260
+ col_offset = byte_offset - it -> byte_col_offset_diff ;
261
+ }
256
262
}
257
263
if (token .end != NULL && token .end >= it -> tok -> line_start ) {
258
264
Py_ssize_t end_byte_offset = token .end - it -> tok -> line_start ;
0 commit comments