Skip to content

Fix break_last_token. #130551

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Sep 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 35 additions & 29 deletions compiler/rustc_ast/src/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -385,35 +385,41 @@ impl TokenKind {
Literal(Lit::new(kind, symbol, suffix))
}

/// An approximation to proc-macro-style single-character operators used by rustc parser.
/// If the operator token can be broken into two tokens, the first of which is single-character,
/// then this function performs that operation, otherwise it returns `None`.
pub fn break_two_token_op(&self) -> Option<(TokenKind, TokenKind)> {
Some(match *self {
Le => (Lt, Eq),
EqEq => (Eq, Eq),
Ne => (Not, Eq),
Ge => (Gt, Eq),
AndAnd => (BinOp(And), BinOp(And)),
OrOr => (BinOp(Or), BinOp(Or)),
BinOp(Shl) => (Lt, Lt),
BinOp(Shr) => (Gt, Gt),
BinOpEq(Plus) => (BinOp(Plus), Eq),
BinOpEq(Minus) => (BinOp(Minus), Eq),
BinOpEq(Star) => (BinOp(Star), Eq),
BinOpEq(Slash) => (BinOp(Slash), Eq),
BinOpEq(Percent) => (BinOp(Percent), Eq),
BinOpEq(Caret) => (BinOp(Caret), Eq),
BinOpEq(And) => (BinOp(And), Eq),
BinOpEq(Or) => (BinOp(Or), Eq),
BinOpEq(Shl) => (Lt, Le),
BinOpEq(Shr) => (Gt, Ge),
DotDot => (Dot, Dot),
DotDotDot => (Dot, DotDot),
PathSep => (Colon, Colon),
RArrow => (BinOp(Minus), Gt),
LArrow => (Lt, BinOp(Minus)),
FatArrow => (Eq, Gt),
/// An approximation to proc-macro-style single-character operators used by
/// rustc parser. If the operator token can be broken into two tokens, the
/// first of which has `n` (1 or 2) chars, then this function performs that
/// operation, otherwise it returns `None`.
pub fn break_two_token_op(&self, n: u32) -> Option<(TokenKind, TokenKind)> {
assert!(n == 1 || n == 2);
Some(match (self, n) {
(Le, 1) => (Lt, Eq),
(EqEq, 1) => (Eq, Eq),
(Ne, 1) => (Not, Eq),
(Ge, 1) => (Gt, Eq),
(AndAnd, 1) => (BinOp(And), BinOp(And)),
(OrOr, 1) => (BinOp(Or), BinOp(Or)),
(BinOp(Shl), 1) => (Lt, Lt),
(BinOp(Shr), 1) => (Gt, Gt),
(BinOpEq(Plus), 1) => (BinOp(Plus), Eq),
(BinOpEq(Minus), 1) => (BinOp(Minus), Eq),
(BinOpEq(Star), 1) => (BinOp(Star), Eq),
(BinOpEq(Slash), 1) => (BinOp(Slash), Eq),
(BinOpEq(Percent), 1) => (BinOp(Percent), Eq),
(BinOpEq(Caret), 1) => (BinOp(Caret), Eq),
(BinOpEq(And), 1) => (BinOp(And), Eq),
(BinOpEq(Or), 1) => (BinOp(Or), Eq),
(BinOpEq(Shl), 1) => (Lt, Le), // `<` + `<=`
(BinOpEq(Shl), 2) => (BinOp(Shl), Eq), // `<<` + `=`
(BinOpEq(Shr), 1) => (Gt, Ge), // `>` + `>=`
(BinOpEq(Shr), 2) => (BinOp(Shr), Eq), // `>>` + `=`
(DotDot, 1) => (Dot, Dot),
(DotDotDot, 1) => (Dot, DotDot), // `.` + `..`
(DotDotDot, 2) => (DotDot, Dot), // `..` + `.`
(DotDotEq, 2) => (DotDot, Eq),
(PathSep, 1) => (Colon, Colon),
(RArrow, 1) => (BinOp(Minus), Gt),
(LArrow, 1) => (Lt, BinOp(Minus)),
(FatArrow, 1) => (Eq, Gt),
_ => return None,
})
}
Expand Down
36 changes: 19 additions & 17 deletions compiler/rustc_parse/src/parser/attr_wrapper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ struct LazyAttrTokenStreamImpl {
start_token: (Token, Spacing),
cursor_snapshot: TokenCursor,
num_calls: u32,
break_last_token: bool,
break_last_token: u32,
node_replacements: Box<[NodeReplacement]>,
}

Expand Down Expand Up @@ -339,17 +339,20 @@ impl<'a> Parser<'a> {
let parser_replacements_end = self.capture_state.parser_replacements.len();

assert!(
!(self.break_last_token && matches!(capture_trailing, Trailing::Yes)),
"Cannot set break_last_token and have trailing token"
!(self.break_last_token > 0 && matches!(capture_trailing, Trailing::Yes)),
"Cannot have break_last_token > 0 and have trailing token"
);
assert!(self.break_last_token <= 2, "cannot break token more than twice");

let end_pos = self.num_bump_calls
+ capture_trailing as u32
// If we 'broke' the last token (e.g. breaking a '>>' token to two '>' tokens), then
// extend the range of captured tokens to include it, since the parser was not actually
// bumped past it. When the `LazyAttrTokenStream` gets converted into an
// `AttrTokenStream`, we will create the proper token.
+ self.break_last_token as u32;
// If we "broke" the last token (e.g. breaking a `>>` token once into `>` + `>`, or
// breaking a `>>=` token twice into `>` + `>` + `=`), then extend the range of
// captured tokens to include it, because the parser was not actually bumped past it.
// (Even if we broke twice, it was still just one token originally, hence the `1`.)
// When the `LazyAttrTokenStream` gets converted into an `AttrTokenStream`, we will
// rebreak that final token once or twice.
+ if self.break_last_token == 0 { 0 } else { 1 };

let num_calls = end_pos - collect_pos.start_pos;

Expand Down Expand Up @@ -425,7 +428,7 @@ impl<'a> Parser<'a> {
// for the `#[cfg]` and/or `#[cfg_attr]` attrs. This allows us to run
// eager cfg-expansion on the captured token stream.
if definite_capture_mode {
assert!(!self.break_last_token, "Should not have unglued last token with cfg attr");
assert!(self.break_last_token == 0, "Should not have unglued last token with cfg attr");

// What is the status here when parsing the example code at the top of this method?
//
Expand Down Expand Up @@ -471,7 +474,7 @@ impl<'a> Parser<'a> {
/// close delims.
fn make_attr_token_stream(
iter: impl Iterator<Item = FlatToken>,
break_last_token: bool,
break_last_token: u32,
) -> AttrTokenStream {
#[derive(Debug)]
struct FrameData {
Expand Down Expand Up @@ -513,18 +516,17 @@ fn make_attr_token_stream(
}
}

if break_last_token {
if break_last_token > 0 {
let last_token = stack_top.inner.pop().unwrap();
if let AttrTokenTree::Token(last_token, spacing) = last_token {
let unglued_first = last_token.kind.break_two_token_op().unwrap().0;
let (unglued, _) = last_token.kind.break_two_token_op(break_last_token).unwrap();

// An 'unglued' token is always two ASCII characters
// Tokens are always ASCII chars, so we can use byte arithmetic here.
let mut first_span = last_token.span.shrink_to_lo();
first_span = first_span.with_hi(first_span.lo() + rustc_span::BytePos(1));
first_span =
first_span.with_hi(first_span.lo() + rustc_span::BytePos(break_last_token));

stack_top
.inner
.push(AttrTokenTree::Token(Token::new(unglued_first, first_span), spacing));
stack_top.inner.push(AttrTokenTree::Token(Token::new(unglued, first_span), spacing));
} else {
panic!("Unexpected last token {last_token:?}")
}
Expand Down
39 changes: 21 additions & 18 deletions compiler/rustc_parse/src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -146,21 +146,25 @@ pub struct Parser<'a> {
token_cursor: TokenCursor,
// The number of calls to `bump`, i.e. the position in the token stream.
num_bump_calls: u32,
// During parsing we may sometimes need to 'unglue' a glued token into two
// component tokens (e.g. '>>' into '>' and '>), so the parser can consume
// them one at a time. This process bypasses the normal capturing mechanism
// (e.g. `num_bump_calls` will not be incremented), since the 'unglued'
// tokens due not exist in the original `TokenStream`.
// During parsing we may sometimes need to "unglue" a glued token into two
// or three component tokens (e.g. `>>` into `>` and `>`, or `>>=` into `>`
// and `>` and `=`), so the parser can consume them one at a time. This
// process bypasses the normal capturing mechanism (e.g. `num_bump_calls`
// will not be incremented), since the "unglued" tokens due not exist in
// the original `TokenStream`.
//
// If we end up consuming both unglued tokens, this is not an issue. We'll
// end up capturing the single 'glued' token.
// If we end up consuming all the component tokens, this is not an issue,
// because we'll end up capturing the single "glued" token.
//
// However, sometimes we may want to capture just the first 'unglued'
// However, sometimes we may want to capture not all of the original
// token. For example, capturing the `Vec<u8>` in `Option<Vec<u8>>`
// requires us to unglue the trailing `>>` token. The `break_last_token`
// field is used to track this token. It gets appended to the captured
// field is used to track these tokens. They get appended to the captured
// stream when we evaluate a `LazyAttrTokenStream`.
break_last_token: bool,
//
// This value is always 0, 1, or 2. It can only reach 2 when splitting
// `>>=` or `<<=`.
break_last_token: u32,
/// This field is used to keep track of how many left angle brackets we have seen. This is
/// required in order to detect extra leading left angle brackets (`<` characters) and error
/// appropriately.
Expand Down Expand Up @@ -453,7 +457,7 @@ impl<'a> Parser<'a> {
expected_tokens: Vec::new(),
token_cursor: TokenCursor { tree_cursor: stream.into_trees(), stack: Vec::new() },
num_bump_calls: 0,
break_last_token: false,
break_last_token: 0,
unmatched_angle_bracket_count: 0,
angle_bracket_nesting: 0,
last_unexpected_token_span: None,
Expand Down Expand Up @@ -773,7 +777,7 @@ impl<'a> Parser<'a> {
self.bump();
return true;
}
match self.token.kind.break_two_token_op() {
match self.token.kind.break_two_token_op(1) {
Some((first, second)) if first == expected => {
let first_span = self.psess.source_map().start_point(self.token.span);
let second_span = self.token.span.with_lo(first_span.hi());
Expand All @@ -783,8 +787,8 @@ impl<'a> Parser<'a> {
//
// If we consume any additional tokens, then this token
// is not needed (we'll capture the entire 'glued' token),
// and `bump` will set this field to `None`
self.break_last_token = true;
// and `bump` will set this field to 0.
self.break_last_token += 1;
// Use the spacing of the glued token as the spacing of the
// unglued second token.
self.bump_with((Token::new(second, second_span), self.token_spacing));
Expand Down Expand Up @@ -1148,10 +1152,9 @@ impl<'a> Parser<'a> {
// than `.0`/`.1` access.
let mut next = self.token_cursor.inlined_next();
self.num_bump_calls += 1;
// We've retrieved an token from the underlying
// cursor, so we no longer need to worry about
// an unglued token. See `break_and_eat` for more details
self.break_last_token = false;
// We got a token from the underlying cursor and no longer need to
// worry about an unglued token. See `break_and_eat` for more details.
self.break_last_token = 0;
if next.0.span.is_dummy() {
// Tweak the location for better diagnostics, but keep syntactic context intact.
let fallback_span = self.token.span;
Expand Down
16 changes: 16 additions & 0 deletions tests/ui/macros/break-last-token-twice.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
//@ check-pass

macro_rules! m {
(static $name:ident: $t:ty = $e:expr) => {
let $name: $t = $e;
}
}

fn main() {
m! {
// Tricky: the trailing `>>=` token here is broken twice:
// - into `>` and `>=`
// - then the `>=` is broken into `>` and `=`
static _x: Vec<Vec<u32>>= vec![]
}
}
Loading