Skip to content

Move doc comment desugaring out of TokenCursor. #114273

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Aug 1, 2023
101 changes: 89 additions & 12 deletions compiler/rustc_ast/src/tokenstream.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
//! and a borrowed `TokenStream` is sufficient to build an owned `TokenStream` without taking
//! ownership of the original.

use crate::ast::StmtKind;
use crate::ast::{AttrStyle, StmtKind};
use crate::ast_traits::{HasAttrs, HasSpan, HasTokens};
use crate::token::{self, Delimiter, Nonterminal, Token, TokenKind};
use crate::AttrVec;
Expand All @@ -22,11 +22,11 @@ use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
use rustc_data_structures::sync::{self, Lrc};
use rustc_macros::HashStable_Generic;
use rustc_serialize::{Decodable, Decoder, Encodable, Encoder};
use rustc_span::{Span, DUMMY_SP};
use rustc_span::{sym, Span, Symbol, DUMMY_SP};
use smallvec::{smallvec, SmallVec};

use std::borrow::Cow;
use std::{fmt, iter, mem};
use std::{cmp, fmt, iter, mem};

/// When the main Rust parser encounters a syntax-extension invocation, it
/// parses the arguments to the invocation as a token tree. This is a very
Expand Down Expand Up @@ -566,6 +566,92 @@ impl TokenStream {
pub fn chunks(&self, chunk_size: usize) -> core::slice::Chunks<'_, TokenTree> {
self.0.chunks(chunk_size)
}

/// Desugar doc comments like `/// foo` in the stream into `#[doc =
/// r"foo"]`. Modifies the `TokenStream` via `Lrc::make_mut`, but as little
/// as possible.
pub fn desugar_doc_comments(&mut self) {
if let Some(desugared_stream) = desugar_inner(self.clone()) {
*self = desugared_stream;
}

// The return value is `None` if nothing in `stream` changed.
fn desugar_inner(mut stream: TokenStream) -> Option<TokenStream> {
let mut i = 0;
let mut modified = false;
while let Some(tt) = stream.0.get(i) {
match tt {
&TokenTree::Token(
Token { kind: token::DocComment(_, attr_style, data), span },
_spacing,
) => {
let desugared = desugared_tts(attr_style, data, span);
let desugared_len = desugared.len();
Lrc::make_mut(&mut stream.0).splice(i..i + 1, desugared);
modified = true;
i += desugared_len;
}

&TokenTree::Token(..) => i += 1,

&TokenTree::Delimited(sp, delim, ref delim_stream) => {
if let Some(desugared_delim_stream) = desugar_inner(delim_stream.clone()) {
let new_tt = TokenTree::Delimited(sp, delim, desugared_delim_stream);
Lrc::make_mut(&mut stream.0)[i] = new_tt;
modified = true;
}
i += 1;
}
}
}
if modified { Some(stream) } else { None }
}

fn desugared_tts(attr_style: AttrStyle, data: Symbol, span: Span) -> Vec<TokenTree> {
// Searches for the occurrences of `"#*` and returns the minimum number of `#`s
// required to wrap the text. E.g.
// - `abc d` is wrapped as `r"abc d"` (num_of_hashes = 0)
// - `abc "d"` is wrapped as `r#"abc "d""#` (num_of_hashes = 1)
// - `abc "##d##"` is wrapped as `r###"abc ##"d"##"###` (num_of_hashes = 3)
let mut num_of_hashes = 0;
let mut count = 0;
for ch in data.as_str().chars() {
count = match ch {
'"' => 1,
'#' if count > 0 => count + 1,
_ => 0,
};
num_of_hashes = cmp::max(num_of_hashes, count);
}

// `/// foo` becomes `doc = r"foo"`.
let delim_span = DelimSpan::from_single(span);
let body = TokenTree::Delimited(
delim_span,
Delimiter::Bracket,
[
TokenTree::token_alone(token::Ident(sym::doc, false), span),
TokenTree::token_alone(token::Eq, span),
TokenTree::token_alone(
TokenKind::lit(token::StrRaw(num_of_hashes), data, None),
span,
),
]
.into_iter()
.collect::<TokenStream>(),
);

if attr_style == AttrStyle::Inner {
vec![
TokenTree::token_alone(token::Pound, span),
TokenTree::token_alone(token::Not, span),
body,
]
} else {
vec![TokenTree::token_alone(token::Pound, span), body]
}
}
}
}

/// By-reference iterator over a [`TokenStream`], that produces `&TokenTree`
Expand Down Expand Up @@ -628,15 +714,6 @@ impl TokenTreeCursor {
pub fn look_ahead(&self, n: usize) -> Option<&TokenTree> {
self.stream.0.get(self.index + n)
}

// Replace the previously obtained token tree with `tts`, and rewind to
// just before them.
pub fn replace_prev_and_rewind(&mut self, tts: Vec<TokenTree>) {
assert!(self.index > 0);
self.index -= 1;
let stream = Lrc::make_mut(&mut self.stream.0);
stream.splice(self.index..self.index + 1, tts);
}
}

#[derive(Debug, Copy, Clone, PartialEq, Encodable, Decodable, HashStable_Generic)]
Expand Down
28 changes: 16 additions & 12 deletions compiler/rustc_expand/src/mbe/macro_rules.rs
Original file line number Diff line number Diff line change
Expand Up @@ -249,15 +249,15 @@ fn expand_macro<'cx>(
trace_macros_note(&mut cx.expansions, sp, msg);
}

let p = Parser::new(sess, tts, false, None);
let p = Parser::new(sess, tts, None);

if is_local {
cx.resolver.record_macro_rule_usage(node_id, i);
}

// Let the context choose how to interpret the result.
// Weird, but useful for X-macros.
return Box::new(ParserAnyMacro {
Box::new(ParserAnyMacro {
parser: p,

// Pass along the original expansion site and the name of the macro
Expand All @@ -269,18 +269,17 @@ fn expand_macro<'cx>(
is_trailing_mac: cx.current_expansion.is_trailing_mac,
arm_span,
is_local,
});
})
}
Err(CanRetry::No(_)) => {
debug!("Will not retry matching as an error was emitted already");
return DummyResult::any(sp);
DummyResult::any(sp)
}
Err(CanRetry::Yes) => {
// Retry and emit a better error below.
// Retry and emit a better error.
diagnostics::failed_to_match_macro(cx, sp, def_span, name, arg, lhses)
}
}

diagnostics::failed_to_match_macro(cx, sp, def_span, name, arg, lhses)
}

pub(super) enum CanRetry {
Expand Down Expand Up @@ -447,7 +446,7 @@ pub fn compile_declarative_macro(

let create_parser = || {
let body = macro_def.body.tokens.clone();
Parser::new(&sess.parse_sess, body, true, rustc_parse::MACRO_ARGUMENTS)
Parser::new(&sess.parse_sess, body, rustc_parse::MACRO_ARGUMENTS)
};

let parser = create_parser();
Expand All @@ -457,8 +456,8 @@ pub fn compile_declarative_macro(
match tt_parser.parse_tt(&mut Cow::Owned(parser), &argument_gram, &mut NoopTracker) {
Success(m) => m,
Failure(()) => {
// The fast `NoopTracker` doesn't have any info on failure, so we need to retry it with another one
// that gives us the information we need.
// The fast `NoopTracker` doesn't have any info on failure, so we need to retry it
// with another one that gives us the information we need.
// For this we need to reclone the macro body as the previous parser consumed it.
let retry_parser = create_parser();

Expand Down Expand Up @@ -1418,6 +1417,11 @@ fn quoted_tt_to_string(tt: &mbe::TokenTree) -> String {
}
}

pub(super) fn parser_from_cx(sess: &ParseSess, tts: TokenStream, recovery: Recovery) -> Parser<'_> {
Parser::new(sess, tts, true, rustc_parse::MACRO_ARGUMENTS).recovery(recovery)
pub(super) fn parser_from_cx(
sess: &ParseSess,
mut tts: TokenStream,
recovery: Recovery,
) -> Parser<'_> {
tts.desugar_doc_comments();
Parser::new(sess, tts, rustc_parse::MACRO_ARGUMENTS).recovery(recovery)
}
4 changes: 2 additions & 2 deletions compiler/rustc_parse/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ pub fn stream_to_parser<'a>(
stream: TokenStream,
subparser_name: Option<&'static str>,
) -> Parser<'a> {
Parser::new(sess, stream, false, subparser_name)
Parser::new(sess, stream, subparser_name)
}

/// Runs the given subparser `f` on the tokens of the given `attr`'s item.
Expand All @@ -215,7 +215,7 @@ pub fn parse_in<'a, T>(
name: &'static str,
mut f: impl FnMut(&mut Parser<'a>) -> PResult<'a, T>,
) -> PResult<'a, T> {
let mut parser = Parser::new(sess, tts, false, Some(name));
let mut parser = Parser::new(sess, tts, Some(name));
let result = f(&mut parser)?;
if parser.token != token::Eof {
parser.unexpected()?;
Expand Down
91 changes: 13 additions & 78 deletions compiler/rustc_parse/src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ use rustc_ast::tokenstream::{TokenStream, TokenTree, TokenTreeCursor};
use rustc_ast::util::case::Case;
use rustc_ast::AttrId;
use rustc_ast::DUMMY_NODE_ID;
use rustc_ast::{self as ast, AnonConst, AttrStyle, Const, DelimArgs, Extern};
use rustc_ast::{self as ast, AnonConst, Const, DelimArgs, Extern};
use rustc_ast::{Async, AttrArgs, AttrArgsEq, Expr, ExprKind, MacDelimiter, Mutability, StrLit};
use rustc_ast::{HasAttrs, HasTokens, Unsafe, Visibility, VisibilityKind};
use rustc_ast_pretty::pprust;
Expand All @@ -38,7 +38,7 @@ use rustc_session::parse::ParseSess;
use rustc_span::source_map::{Span, DUMMY_SP};
use rustc_span::symbol::{kw, sym, Ident, Symbol};
use std::ops::Range;
use std::{cmp, mem, slice};
use std::{mem, slice};
use thin_vec::ThinVec;
use tracing::debug;

Expand Down Expand Up @@ -224,11 +224,6 @@ struct TokenCursor {
// because it's the outermost token stream which never has delimiters.
stack: Vec<(TokenTreeCursor, Delimiter, DelimSpan)>,

// We need to desugar doc comments from `/// foo` form into `#[doc =
// r"foo"]` form when parsing declarative macro inputs in `parse_tt`,
// because some declarative macros look for `doc` attributes.
desugar_doc_comments: bool,

// Counts the number of calls to `{,inlined_}next`.
num_next_calls: usize,

Expand Down Expand Up @@ -265,29 +260,17 @@ impl TokenCursor {
#[inline(always)]
fn inlined_next(&mut self) -> (Token, Spacing) {
loop {
// FIXME: we currently don't return `Delimiter` open/close delims. To fix #67062 we will
// need to, whereupon the `delim != Delimiter::Invisible` conditions below can be
// removed.
// FIXME: we currently don't return `Delimiter::Invisible` open/close delims. To fix
// #67062 we will need to, whereupon the `delim != Delimiter::Invisible` conditions
// below can be removed.
if let Some(tree) = self.tree_cursor.next_ref() {
match tree {
&TokenTree::Token(ref token, spacing) => {
match (self.desugar_doc_comments, token) {
(
true,
&Token { kind: token::DocComment(_, attr_style, data), span },
) => {
let desugared = self.desugar(attr_style, data, span);
self.tree_cursor.replace_prev_and_rewind(desugared);
// Continue to get the first token of the desugared doc comment.
}
_ => {
debug_assert!(!matches!(
token.kind,
token::OpenDelim(_) | token::CloseDelim(_)
));
return (token.clone(), spacing);
}
}
debug_assert!(!matches!(
token.kind,
token::OpenDelim(_) | token::CloseDelim(_)
));
return (token.clone(), spacing);
}
&TokenTree::Delimited(sp, delim, ref tts) => {
let trees = tts.clone().into_trees();
Expand All @@ -311,52 +294,6 @@ impl TokenCursor {
}
}
}

// Desugar a doc comment into something like `#[doc = r"foo"]`.
fn desugar(&mut self, attr_style: AttrStyle, data: Symbol, span: Span) -> Vec<TokenTree> {
// Searches for the occurrences of `"#*` and returns the minimum number of `#`s
// required to wrap the text. E.g.
// - `abc d` is wrapped as `r"abc d"` (num_of_hashes = 0)
// - `abc "d"` is wrapped as `r#"abc "d""#` (num_of_hashes = 1)
// - `abc "##d##"` is wrapped as `r###"abc ##"d"##"###` (num_of_hashes = 3)
let mut num_of_hashes = 0;
let mut count = 0;
for ch in data.as_str().chars() {
count = match ch {
'"' => 1,
'#' if count > 0 => count + 1,
_ => 0,
};
num_of_hashes = cmp::max(num_of_hashes, count);
}

// `/// foo` becomes `doc = r"foo"`.
let delim_span = DelimSpan::from_single(span);
let body = TokenTree::Delimited(
delim_span,
Delimiter::Bracket,
[
TokenTree::token_alone(token::Ident(sym::doc, false), span),
TokenTree::token_alone(token::Eq, span),
TokenTree::token_alone(
TokenKind::lit(token::StrRaw(num_of_hashes), data, None),
span,
),
]
.into_iter()
.collect::<TokenStream>(),
);

if attr_style == AttrStyle::Inner {
vec![
TokenTree::token_alone(token::Pound, span),
TokenTree::token_alone(token::Not, span),
body,
]
} else {
vec![TokenTree::token_alone(token::Pound, span), body]
}
}
}

#[derive(Debug, Clone, PartialEq)]
Expand Down Expand Up @@ -451,8 +388,7 @@ pub(super) fn token_descr(token: &Token) -> String {
impl<'a> Parser<'a> {
pub fn new(
sess: &'a ParseSess,
tokens: TokenStream,
desugar_doc_comments: bool,
stream: TokenStream,
subparser_name: Option<&'static str>,
) -> Self {
let mut parser = Parser {
Expand All @@ -464,10 +400,9 @@ impl<'a> Parser<'a> {
restrictions: Restrictions::empty(),
expected_tokens: Vec::new(),
token_cursor: TokenCursor {
tree_cursor: tokens.into_trees(),
tree_cursor: stream.into_trees(),
stack: Vec::new(),
num_next_calls: 0,
desugar_doc_comments,
break_last_token: false,
},
unmatched_angle_bracket_count: 0,
Expand Down Expand Up @@ -1172,7 +1107,7 @@ impl<'a> Parser<'a> {
}
i += 1;
}
return looker(&token);
looker(&token)
}

/// Returns whether any of the given keywords are `dist` tokens ahead of the current one.
Expand Down