Skip to content

fix: Fix parsing of nested tuple field accesses in a cursed way #14084

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Feb 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions crates/hir-def/src/item_tree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,8 @@ impl ItemTree {
Some(node) => node,
None => return Default::default(),
};
if never!(syntax.kind() == SyntaxKind::ERROR) {
if never!(syntax.kind() == SyntaxKind::ERROR, "{:?} from {:?} {}", file_id, syntax, syntax)
{
// FIXME: not 100% sure why these crop up, but return an empty tree to avoid a panic
return Default::default();
}
Expand All @@ -133,7 +134,7 @@ impl ItemTree {
ctx.lower_macro_stmts(stmts)
},
_ => {
panic!("cannot create item tree from {syntax:?} {syntax}");
panic!("cannot create item tree for file {file_id:?} from {syntax:?} {syntax}");
},
}
};
Expand Down
35 changes: 35 additions & 0 deletions crates/hir-def/src/macro_expansion_tests/mbe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,41 @@ fn#19 main#20(#21)#21 {#22
"##]],
);
}
#[test]
fn float_field_acces_macro_input() {
check(
r#"
macro_rules! foo {
($expr:expr) => {
fn foo() {
$expr;
}
};
}
foo!(x .0.1);
foo!(x .2. 3);
foo!(x .4 .5);
"#,
expect![[r#"
macro_rules! foo {
($expr:expr) => {
fn foo() {
$expr;
}
};
}
fn foo() {
(x.0.1);
}
fn foo() {
(x.2.3);
}
fn foo() {
(x.4.5);
}
"#]],
);
}

#[test]
fn mbe_smoke_test() {
Expand Down
7 changes: 3 additions & 4 deletions crates/hir-def/src/macro_expansion_tests/proc_macros.rs
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ macro_rules! id {
$($t)*
};
}
id /*+errors*/! {
id! {
#[proc_macros::identity]
impl Foo for WrapBj {
async fn foo(&self) {
Expand All @@ -113,18 +113,17 @@ id /*+errors*/! {
}
}
"#,
expect![[r##"
expect![[r#"
macro_rules! id {
($($t:tt)*) => {
$($t)*
};
}
/* parse error: expected SEMICOLON */
#[proc_macros::identity] impl Foo for WrapBj {
async fn foo(&self ) {
self .0.id().await ;
}
}
"##]],
"#]],
);
}
40 changes: 40 additions & 0 deletions crates/mbe/src/syntax_bridge.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,9 @@ pub fn token_tree_to_syntax_node(
parser::Step::Token { kind, n_input_tokens: n_raw_tokens } => {
tree_sink.token(kind, n_raw_tokens)
}
parser::Step::FloatSplit { ends_in_dot: has_pseudo_dot } => {
tree_sink.float_split(has_pseudo_dot)
}
parser::Step::Enter { kind } => tree_sink.start_node(kind),
parser::Step::Exit => tree_sink.finish_node(),
parser::Step::Error { msg } => tree_sink.error(msg.to_string()),
Expand Down Expand Up @@ -796,6 +799,43 @@ fn delim_to_str(d: tt::DelimiterKind, closing: bool) -> Option<&'static str> {
}

impl<'a> TtTreeSink<'a> {
/// Parses a float literal as if it was a one to two name ref nodes with a dot inbetween.
/// This occurs when a float literal is used as a field access.
fn float_split(&mut self, has_pseudo_dot: bool) {
let (text, _span) = match self.cursor.token_tree() {
Some(tt::buffer::TokenTreeRef::Leaf(tt::Leaf::Literal(lit), _)) => {
(lit.text.as_str(), lit.span)
}
_ => unreachable!(),
};
match text.split_once('.') {
Some((left, right)) => {
assert!(!left.is_empty());
self.inner.start_node(SyntaxKind::NAME_REF);
self.inner.token(SyntaxKind::INT_NUMBER, left);
self.inner.finish_node();

// here we move the exit up, the original exit has been deleted in process
self.inner.finish_node();

self.inner.token(SyntaxKind::DOT, ".");

if has_pseudo_dot {
assert!(right.is_empty(), "{left}.{right}");
} else {
self.inner.start_node(SyntaxKind::NAME_REF);
self.inner.token(SyntaxKind::INT_NUMBER, right);
self.inner.finish_node();

// the parser creates an unbalanced start node, we are required to close it here
self.inner.finish_node();
}
}
None => unreachable!(),
}
self.cursor = self.cursor.bump();
}

fn token(&mut self, kind: SyntaxKind, mut n_tokens: u8) {
if kind == LIFETIME_IDENT {
n_tokens = 2;
Expand Down
7 changes: 7 additions & 0 deletions crates/mbe/src/to_parser_input.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,13 @@ pub(crate) fn to_parser_input(buffer: &TokenBuffer<'_>) -> parser::Input {
.unwrap_or_else(|| panic!("Fail to convert given literal {:#?}", &lit));

res.push(kind);

if kind == FLOAT_NUMBER && !inner_text.ends_with('.') {
// Tag the token as joint if it is float with a fractional part
// we use this jointness to inform the parser about what token split
// event to emit when we encounter a float literal in a field access
res.was_joint();
}
}
tt::Leaf::Ident(ident) => match ident.text.as_ref() {
"_" => res.push(T![_]),
Expand Down
16 changes: 10 additions & 6 deletions crates/mbe/src/tt_iter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,11 @@ impl<'a> TtIter<'a> {
cursor = cursor.bump_subtree();
}
}
parser::Step::FloatSplit { .. } => {
// FIXME: We need to split the tree properly here, but mutating the token trees
// in the buffer is somewhat tricky to pull off.
cursor = cursor.bump_subtree();
}
parser::Step::Enter { .. } | parser::Step::Exit => (),
parser::Step::Error { .. } => error = true,
}
Expand All @@ -166,19 +171,18 @@ impl<'a> TtIter<'a> {

if cursor.is_root() {
while curr != cursor {
if let Some(token) = curr.token_tree() {
res.push(token);
}
let Some(token) = curr.token_tree() else { break };
res.push(token.cloned());
curr = curr.bump();
}
}

self.inner = self.inner.as_slice()[res.len()..].iter();
let res = match res.len() {
1 => Some(res[0].cloned()),
0 => None,
0 | 1 => res.pop(),
_ => Some(tt::TokenTree::Subtree(tt::Subtree {
delimiter: tt::Delimiter::unspecified(),
token_trees: res.into_iter().map(|it| it.cloned()).collect(),
token_trees: res,
})),
};
ExpandResult { value: res, err }
Expand Down
13 changes: 12 additions & 1 deletion crates/parser/src/event.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,13 @@ pub(crate) enum Event {
kind: SyntaxKind,
n_raw_tokens: u8,
},

/// When we parse `foo.0.0` or `foo. 0. 0` the lexer will hand us a float literal
/// instead of an integer literal followed by a dot as the lexer has no contextual knowledge.
/// This event instructs whatever consumes the events to split the float literal into
/// the corresponding parts.
FloatSplitHack {
ends_in_dot: bool,
},
Error {
msg: String,
},
Expand Down Expand Up @@ -125,6 +131,11 @@ pub(super) fn process(mut events: Vec<Event>) -> Output {
Event::Token { kind, n_raw_tokens } => {
res.token(kind, n_raw_tokens);
}
Event::FloatSplitHack { ends_in_dot } => {
res.float_split_hack(ends_in_dot);
let ev = mem::replace(&mut events[i + 1], Event::tombstone());
assert!(matches!(ev, Event::Finish), "{ev:?}");
}
Event::Error { msg } => res.error(msg),
}
}
Expand Down
98 changes: 66 additions & 32 deletions crates/parser/src/grammar/expressions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -379,7 +379,7 @@ fn postfix_expr(
// }
T!['('] if allow_calls => call_expr(p, lhs),
T!['['] if allow_calls => index_expr(p, lhs),
T![.] => match postfix_dot_expr(p, lhs) {
T![.] => match postfix_dot_expr::<false>(p, lhs) {
Ok(it) => it,
Err(it) => {
lhs = it;
Expand All @@ -393,35 +393,44 @@ fn postfix_expr(
block_like = BlockLike::NotBlock;
}
return (lhs, block_like);
}

fn postfix_dot_expr(
p: &mut Parser<'_>,
lhs: CompletedMarker,
) -> Result<CompletedMarker, CompletedMarker> {
fn postfix_dot_expr<const FLOAT_RECOVERY: bool>(
p: &mut Parser<'_>,
lhs: CompletedMarker,
) -> Result<CompletedMarker, CompletedMarker> {
if !FLOAT_RECOVERY {
assert!(p.at(T![.]));
if p.nth(1) == IDENT && (p.nth(2) == T!['('] || p.nth_at(2, T![::])) {
return Ok(method_call_expr(p, lhs));
}
}
let nth1 = if FLOAT_RECOVERY { 0 } else { 1 };
let nth2 = if FLOAT_RECOVERY { 1 } else { 2 };

// test await_expr
// fn foo() {
// x.await;
// x.0.await;
// x.0().await?.hello();
// }
if p.nth(1) == T![await] {
let m = lhs.precede(p);
p.bump(T![.]);
p.bump(T![await]);
return Ok(m.complete(p, AWAIT_EXPR));
}
if p.nth(nth1) == IDENT && (p.nth(nth2) == T!['('] || p.nth_at(nth2, T![::])) {
return Ok(method_call_expr::<FLOAT_RECOVERY>(p, lhs));
}

if p.at(T![..=]) || p.at(T![..]) {
return Err(lhs);
// test await_expr
// fn foo() {
// x.await;
// x.0.await;
// x.0().await?.hello();
// x.0.0.await;
// x.0. await;
// }
if p.nth(nth1) == T![await] {
let m = lhs.precede(p);
if !FLOAT_RECOVERY {
p.bump(T![.]);
}
p.bump(T![await]);
return Ok(m.complete(p, AWAIT_EXPR));
}

Ok(field_expr(p, lhs))
if p.at(T![..=]) || p.at(T![..]) {
return Err(lhs);
}

field_expr::<FLOAT_RECOVERY>(p, lhs)
}

// test call_expr
Expand Down Expand Up @@ -455,11 +464,22 @@ fn index_expr(p: &mut Parser<'_>, lhs: CompletedMarker) -> CompletedMarker {
// fn foo() {
// x.foo();
// y.bar::<T>(1, 2,);
// x.0.0.call();
// x.0. call();
// }
fn method_call_expr(p: &mut Parser<'_>, lhs: CompletedMarker) -> CompletedMarker {
assert!(p.at(T![.]) && p.nth(1) == IDENT && (p.nth(2) == T!['('] || p.nth_at(2, T![::])));
fn method_call_expr<const FLOAT_RECOVERY: bool>(
p: &mut Parser<'_>,
lhs: CompletedMarker,
) -> CompletedMarker {
if FLOAT_RECOVERY {
assert!(p.nth(0) == IDENT && (p.nth(1) == T!['('] || p.nth_at(1, T![::])));
} else {
assert!(p.at(T![.]) && p.nth(1) == IDENT && (p.nth(2) == T!['('] || p.nth_at(2, T![::])));
}
let m = lhs.precede(p);
p.bump_any();
if !FLOAT_RECOVERY {
p.bump(T![.]);
}
name_ref(p);
generic_args::opt_generic_arg_list(p, true);
if p.at(T!['(']) {
Expand All @@ -472,21 +492,35 @@ fn method_call_expr(p: &mut Parser<'_>, lhs: CompletedMarker) -> CompletedMarker
// fn foo() {
// x.foo;
// x.0.bar;
// x.0.1;
// x.0. bar;
// x.0();
// }
fn field_expr(p: &mut Parser<'_>, lhs: CompletedMarker) -> CompletedMarker {
assert!(p.at(T![.]));
fn field_expr<const FLOAT_RECOVERY: bool>(
p: &mut Parser<'_>,
lhs: CompletedMarker,
) -> Result<CompletedMarker, CompletedMarker> {
if !FLOAT_RECOVERY {
assert!(p.at(T![.]));
}
let m = lhs.precede(p);
p.bump(T![.]);
if !FLOAT_RECOVERY {
p.bump(T![.]);
}
if p.at(IDENT) || p.at(INT_NUMBER) {
name_ref_or_index(p);
} else if p.at(FLOAT_NUMBER) {
// FIXME: How to recover and instead parse INT + T![.]?
p.bump_any();
return match p.split_float(m) {
(true, m) => {
let lhs = m.complete(p, FIELD_EXPR);
postfix_dot_expr::<true>(p, lhs)
}
(false, m) => Ok(m.complete(p, FIELD_EXPR)),
};
} else {
p.error("expected field name or number");
}
m.complete(p, FIELD_EXPR)
Ok(m.complete(p, FIELD_EXPR))
}

// test try_expr
Expand Down
4 changes: 4 additions & 0 deletions crates/parser/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -102,10 +102,14 @@ impl TopEntryPoint {
match step {
Step::Enter { .. } => depth += 1,
Step::Exit => depth -= 1,
Step::FloatSplit { ends_in_dot: has_pseudo_dot } => {
depth -= 1 + !has_pseudo_dot as usize
}
Step::Token { .. } | Step::Error { .. } => (),
}
}
assert!(!first, "no tree at all");
assert_eq!(depth, 0, "unbalanced tree");
}

res
Expand Down
Loading