Skip to content

Commit 00e47ae

Browse files
authored
Merge pull request #48 from lysnikolaou/fstring-middle-before-end
Emit FSTRING_MIDDLE for literal after last expression and before quotes
2 parents 081f5a2 + 0026415 commit 00e47ae

File tree

5 files changed

+45
-54
lines changed

5 files changed

+45
-54
lines changed

Include/internal/pycore_token.h

+1-2
Original file line numberDiff line numberDiff line change
@@ -91,8 +91,7 @@ extern "C" {
9191
(x) == INDENT || \
9292
(x) == DEDENT)
9393
#define ISSTRINGLIT(x) ((x) == STRING || \
94-
(x) == FSTRING_MIDDLE || \
95-
(x) == FSTRING_END)
94+
(x) == FSTRING_MIDDLE)
9695

9796

9897
// Symbols exported for test_peg_generator

Lib/test/test_tokenize.py

+10-5
Original file line numberDiff line numberDiff line change
@@ -1938,7 +1938,8 @@ def test_string(self):
19381938

19391939
self.check_tokenize('f"abc"', """\
19401940
FSTRING_START 'f"' (1, 0) (1, 2)
1941-
FSTRING_END 'abc' (1, 2) (1, 5)
1941+
FSTRING_MIDDLE 'abc' (1, 2) (1, 5)
1942+
FSTRING_END '"' (1, 5) (1, 6)
19421943
""")
19431944

19441945
self.check_tokenize('fR"a{b}c"', """\
@@ -1947,24 +1948,28 @@ def test_string(self):
19471948
LBRACE '{' (1, 4) (1, 5)
19481949
NAME 'b' (1, 5) (1, 6)
19491950
RBRACE '}' (1, 6) (1, 7)
1950-
FSTRING_END 'c' (1, 7) (1, 8)
1951+
FSTRING_MIDDLE 'c' (1, 7) (1, 8)
1952+
FSTRING_END '"' (1, 8) (1, 9)
19511953
""")
19521954

19531955
self.check_tokenize('f"""abc"""', """\
19541956
FSTRING_START 'f\"""' (1, 0) (1, 4)
1955-
FSTRING_END 'abc' (1, 4) (1, 7)
1957+
FSTRING_MIDDLE 'abc' (1, 4) (1, 7)
1958+
FSTRING_END '\"""' (1, 7) (1, 10)
19561959
""")
19571960

19581961
self.check_tokenize(r'f"abc\
19591962
def"', """\
19601963
FSTRING_START \'f"\' (1, 0) (1, 2)
1961-
FSTRING_END 'abc\\\\\\ndef' (1, 2) (2, 3)
1964+
FSTRING_MIDDLE 'abc\\\\\\ndef' (1, 2) (2, 3)
1965+
FSTRING_END '"' (2, 3) (2, 4)
19621966
""")
19631967

19641968
self.check_tokenize(r'Rf"abc\
19651969
def"', """\
19661970
FSTRING_START 'Rf"' (1, 0) (1, 3)
1967-
FSTRING_END 'abc\\\\\\ndef' (1, 3) (2, 3)
1971+
FSTRING_MIDDLE 'abc\\\\\\ndef' (1, 3) (2, 3)
1972+
FSTRING_END '"' (2, 3) (2, 4)
19681973
""")
19691974

19701975
def test_function(self):

Parser/action_helpers.c

+1-33
Original file line numberDiff line numberDiff line change
@@ -1388,15 +1388,9 @@ deal_with_gstring2(Parser *p, Token* a, asdl_expr_seq* raw_expressions, Token*b)
13881388
if (quote_str == NULL) {
13891389
return NULL;
13901390
}
1391-
13921391
int is_raw = strpbrk(quote_str, "rR") != NULL;
1393-
const char* _str = PyBytes_AsString(b->bytes);
1394-
if (_str == NULL) {
1395-
return NULL;
1396-
}
1397-
int is_b_empty = strlen(_str) == 0;
13981392

1399-
asdl_expr_seq *seq = _Py_asdl_expr_seq_new(is_b_empty ? n_items : n_items + 1, p->arena);
1393+
asdl_expr_seq *seq = _Py_asdl_expr_seq_new(n_items, p->arena);
14001394
if (seq == NULL) {
14011395
return NULL;
14021396
}
@@ -1412,32 +1406,6 @@ deal_with_gstring2(Parser *p, Token* a, asdl_expr_seq* raw_expressions, Token*b)
14121406
asdl_seq_SET(seq, i, item);
14131407
}
14141408

1415-
if (!is_b_empty) {
1416-
PyObject* str = PyUnicode_FromString(_str);
1417-
if (str == NULL) {
1418-
return NULL;
1419-
}
1420-
if (_PyArena_AddPyObject(p->arena, str) < 0) {
1421-
Py_DECREF(str);
1422-
return NULL;
1423-
}
1424-
1425-
expr_ty the_str = _PyAST_Constant(str, NULL, b->lineno,
1426-
b->col_offset, b->end_lineno,
1427-
b->end_col_offset, p->arena);
1428-
if (the_str == NULL) {
1429-
return NULL;
1430-
}
1431-
1432-
expr_ty decoded_str = _PyPegen_decode_fstring_part(
1433-
p, is_raw, the_str
1434-
);
1435-
if (decoded_str == NULL) {
1436-
return NULL;
1437-
}
1438-
1439-
asdl_seq_SET(seq, n_items, decoded_str);
1440-
}
14411409
return _PyAST_JoinedStr(seq, a->lineno, a->col_offset,
14421410
b->end_lineno, b->end_col_offset,
14431411
p->arena);

Parser/tokenizer.c

+32-12
Original file line numberDiff line numberDiff line change
@@ -2434,18 +2434,41 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct
24342434
// If we start with a bracket, we defer to the normal mode as there is nothing for us to tokenize
24352435
// before it.
24362436
char start_char = tok_nextc(tok);
2437-
char peek = tok_nextc(tok);
2438-
tok_backup(tok, peek);
2437+
char peek1 = tok_nextc(tok);
2438+
char peek2 = tok_nextc(tok);
2439+
tok_backup(tok, peek2);
2440+
tok_backup(tok, peek1);
24392441
tok_backup(tok, start_char);
24402442

2441-
if ((start_char == '{' && peek != '{') || (start_char == '}' && peek != '}')) {
2443+
if ((start_char == '{' && peek1 != '{') || (start_char == '}' && peek1 != '}')) {
24422444
if (start_char == '{') {
24432445
current_tok->bracket_mark[++current_tok->bracket_mark_index] = current_tok->bracket_stack;
24442446
}
24452447
tok->tok_mode_stack[tok->tok_mode_stack_index].kind = TOK_REGULAR_MODE;
24462448
return tok_get_normal_mode(tok, current_tok, token);
24472449
}
24482450

2451+
// Emit FSTRING_END in case we've reached the end of the string
2452+
if (start_char == current_tok->f_string_quote
2453+
&& (current_tok->f_string_quote_size != 3 || (start_char == peek1 && start_char == peek2))) {
2454+
// Advance the tokenizer state again to create a token out of the end quotes
2455+
for (int i = 0; i < current_tok->f_string_quote_size; i++) {
2456+
tok_nextc(tok);
2457+
}
2458+
2459+
if (current_tok->last_expr_buffer != NULL) {
2460+
PyMem_Free(current_tok->last_expr_buffer);
2461+
current_tok->last_expr_buffer = NULL;
2462+
current_tok->last_expr_size = 0;
2463+
current_tok->last_expr_end = -1;
2464+
}
2465+
2466+
p_start = tok->start;
2467+
p_end = tok->cur;
2468+
tok->tok_mode_stack_index--;
2469+
return MAKE_TOKEN(FSTRING_END);
2470+
}
2471+
24492472
int end_quote_size = 0;
24502473
int unicode_escape = 0;
24512474
while (end_quote_size != current_tok->f_string_quote_size) {
@@ -2549,17 +2572,14 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct
25492572
}
25502573
}
25512574

2552-
if (current_tok->last_expr_buffer != NULL) {
2553-
PyMem_Free(current_tok->last_expr_buffer);
2554-
current_tok->last_expr_buffer = NULL;
2555-
current_tok->last_expr_size = 0;
2556-
current_tok->last_expr_end = -1;
2575+
// Backup the f-string quotes to emit a final FSTRING_MIDDLE and
2576+
// add the quotes to the FSTRING_END in the next tokenizer iteration.
2577+
for (int i = 0; i < current_tok->f_string_quote_size; i++) {
2578+
tok_backup(tok, current_tok->f_string_quote);
25572579
}
2558-
25592580
p_start = tok->start;
2560-
p_end = tok->cur - current_tok->f_string_quote_size;
2561-
tok->tok_mode_stack_index--;
2562-
return MAKE_TOKEN(FSTRING_END);
2581+
p_end = tok->cur;
2582+
return MAKE_TOKEN(FSTRING_MIDDLE);
25632583
}
25642584

25652585

Tools/build/generate_token.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -81,8 +81,7 @@ def update_file(file, content):
8181
(x) == INDENT || \\
8282
(x) == DEDENT)
8383
#define ISSTRINGLIT(x) ((x) == STRING || \\
84-
(x) == FSTRING_MIDDLE || \\
85-
(x) == FSTRING_END)
84+
(x) == FSTRING_MIDDLE)
8685
8786
8887
// Symbols exported for test_peg_generator

0 commit comments

Comments
 (0)