Skip to content

Commit 238d907

Browse files
committedApr 6, 2022
Call compute_locs once per rule.
Currently it's called in `parse_tt` every time a match rule is invoked. This commit moves it so it's called instead once per match rule, in `compile_declarative_macro. This is a performance win. The commit also moves `compute_locs` out of `TtParser`, because there's no longer any reason for it to be in there.
·
1.88.01.62.0
1 parent 7300bd6 commit 238d907

File tree

2 files changed

+123
-114
lines changed

2 files changed

+123
-114
lines changed
 

‎compiler/rustc_expand/src/mbe/macro_parser.rs

Lines changed: 89 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ rustc_data_structures::static_assert_size!(NamedMatchVec, 48);
106106
///
107107
/// This means a matcher can be represented by `&[MatcherLoc]`, and traversal mostly involves
108108
/// simply incrementing the current matcher position index by one.
109-
enum MatcherLoc {
109+
pub(super) enum MatcherLoc {
110110
Token {
111111
token: Token,
112112
},
@@ -138,6 +138,78 @@ enum MatcherLoc {
138138
Eof,
139139
}
140140

141+
pub(super) fn compute_locs(sess: &ParseSess, matcher: &[TokenTree]) -> Vec<MatcherLoc> {
142+
fn inner(
143+
sess: &ParseSess,
144+
tts: &[TokenTree],
145+
locs: &mut Vec<MatcherLoc>,
146+
next_metavar: &mut usize,
147+
seq_depth: usize,
148+
) {
149+
for tt in tts {
150+
match tt {
151+
TokenTree::Token(token) => {
152+
locs.push(MatcherLoc::Token { token: token.clone() });
153+
}
154+
TokenTree::Delimited(_, delimited) => {
155+
locs.push(MatcherLoc::Delimited);
156+
inner(sess, &delimited.all_tts, locs, next_metavar, seq_depth);
157+
}
158+
TokenTree::Sequence(_, seq) => {
159+
// We can't determine `idx_first_after` and construct the final
160+
// `MatcherLoc::Sequence` until after `inner()` is called and the sequence end
161+
// pieces are processed. So we push a dummy value (`Eof` is cheapest to
162+
// construct) now, and overwrite it with the proper value below.
163+
let dummy = MatcherLoc::Eof;
164+
locs.push(dummy);
165+
166+
let next_metavar_orig = *next_metavar;
167+
let op = seq.kleene.op;
168+
let idx_first = locs.len();
169+
let idx_seq = idx_first - 1;
170+
inner(sess, &seq.tts, locs, next_metavar, seq_depth + 1);
171+
172+
if let Some(separator) = &seq.separator {
173+
locs.push(MatcherLoc::SequenceSep { separator: separator.clone() });
174+
locs.push(MatcherLoc::SequenceKleeneOpAfterSep { idx_first });
175+
} else {
176+
locs.push(MatcherLoc::SequenceKleeneOpNoSep { op, idx_first });
177+
}
178+
179+
// Overwrite the dummy value pushed above with the proper value.
180+
locs[idx_seq] = MatcherLoc::Sequence {
181+
op,
182+
num_metavar_decls: seq.num_captures,
183+
idx_first_after: locs.len(),
184+
next_metavar: next_metavar_orig,
185+
seq_depth,
186+
};
187+
}
188+
&TokenTree::MetaVarDecl(span, bind, kind) => {
189+
locs.push(MatcherLoc::MetaVarDecl {
190+
span,
191+
bind,
192+
kind,
193+
next_metavar: *next_metavar,
194+
seq_depth,
195+
});
196+
*next_metavar += 1;
197+
}
198+
TokenTree::MetaVar(..) | TokenTree::MetaVarExpr(..) => unreachable!(),
199+
}
200+
}
201+
}
202+
203+
let mut locs = vec![];
204+
let mut next_metavar = 0;
205+
inner(sess, matcher, &mut locs, &mut next_metavar, /* seq_depth */ 0);
206+
207+
// A final entry is needed for eof.
208+
locs.push(MatcherLoc::Eof);
209+
210+
locs
211+
}
212+
141213
/// A single matcher position, representing the state of matching.
142214
struct MatcherPos {
143215
/// The index into `TtParser::locs`, which represents the "dot".
@@ -301,9 +373,6 @@ fn token_name_eq(t1: &Token, t2: &Token) -> bool {
301373
pub struct TtParser {
302374
macro_name: Ident,
303375

304-
/// The matcher of the current rule.
305-
locs: Vec<MatcherLoc>,
306-
307376
/// The set of current mps to be processed. This should be empty by the end of a successful
308377
/// execution of `parse_tt_inner`.
309378
cur_mps: Vec<MatcherPos>,
@@ -324,92 +393,13 @@ impl TtParser {
324393
pub(super) fn new(macro_name: Ident) -> TtParser {
325394
TtParser {
326395
macro_name,
327-
locs: vec![],
328396
cur_mps: vec![],
329397
next_mps: vec![],
330398
bb_mps: vec![],
331399
empty_matches: Lrc::new(smallvec![]),
332400
}
333401
}
334402

335-
/// Convert a `&[TokenTree]` to a `&[MatcherLoc]`. Note: this conversion happens every time the
336-
/// macro is called, which may be many times if there are many call sites or if it is
337-
/// recursive. This conversion is fairly cheap and the representation is sufficiently better
338-
/// for matching than `&[TokenTree]` that it's a clear performance win even with the overhead.
339-
/// But it might be possible to move the conversion outwards so it only occurs once per macro.
340-
fn compute_locs(&mut self, sess: &ParseSess, matcher: &[TokenTree]) -> usize {
341-
fn inner(
342-
sess: &ParseSess,
343-
tts: &[TokenTree],
344-
locs: &mut Vec<MatcherLoc>,
345-
next_metavar: &mut usize,
346-
seq_depth: usize,
347-
) {
348-
for tt in tts {
349-
match tt {
350-
TokenTree::Token(token) => {
351-
locs.push(MatcherLoc::Token { token: token.clone() });
352-
}
353-
TokenTree::Delimited(_, delimited) => {
354-
locs.push(MatcherLoc::Delimited);
355-
inner(sess, &delimited.all_tts, locs, next_metavar, seq_depth);
356-
}
357-
TokenTree::Sequence(_, seq) => {
358-
// We can't determine `idx_first_after` and construct the final
359-
// `MatcherLoc::Sequence` until after `inner()` is called and the sequence
360-
// end pieces are processed. So we push a dummy value (`Eof` is cheapest to
361-
// construct) now, and overwrite it with the proper value below.
362-
let dummy = MatcherLoc::Eof;
363-
locs.push(dummy);
364-
365-
let next_metavar_orig = *next_metavar;
366-
let op = seq.kleene.op;
367-
let idx_first = locs.len();
368-
let idx_seq = idx_first - 1;
369-
inner(sess, &seq.tts, locs, next_metavar, seq_depth + 1);
370-
371-
if let Some(separator) = &seq.separator {
372-
locs.push(MatcherLoc::SequenceSep { separator: separator.clone() });
373-
locs.push(MatcherLoc::SequenceKleeneOpAfterSep { idx_first });
374-
} else {
375-
locs.push(MatcherLoc::SequenceKleeneOpNoSep { op, idx_first });
376-
}
377-
378-
// Overwrite the dummy value pushed above with the proper value.
379-
locs[idx_seq] = MatcherLoc::Sequence {
380-
op,
381-
num_metavar_decls: seq.num_captures,
382-
idx_first_after: locs.len(),
383-
next_metavar: next_metavar_orig,
384-
seq_depth,
385-
};
386-
}
387-
&TokenTree::MetaVarDecl(span, bind, kind) => {
388-
locs.push(MatcherLoc::MetaVarDecl {
389-
span,
390-
bind,
391-
kind,
392-
next_metavar: *next_metavar,
393-
seq_depth,
394-
});
395-
*next_metavar += 1;
396-
}
397-
TokenTree::MetaVar(..) | TokenTree::MetaVarExpr(..) => unreachable!(),
398-
}
399-
}
400-
}
401-
402-
self.locs.clear();
403-
let mut next_metavar = 0;
404-
inner(sess, matcher, &mut self.locs, &mut next_metavar, /* seq_depth */ 0);
405-
406-
// A final entry is needed for eof.
407-
self.locs.push(MatcherLoc::Eof);
408-
409-
// This is the number of metavar decls.
410-
next_metavar
411-
}
412-
413403
/// Process the matcher positions of `cur_mps` until it is empty. In the process, this will
414404
/// produce more mps in `next_mps` and `bb_mps`.
415405
///
@@ -420,15 +410,15 @@ impl TtParser {
420410
fn parse_tt_inner(
421411
&mut self,
422412
sess: &ParseSess,
423-
num_metavar_decls: usize,
413+
matcher: &[MatcherLoc],
424414
token: &Token,
425415
) -> Option<NamedParseResult> {
426416
// Matcher positions that would be valid if the macro invocation was over now. Only
427417
// modified if `token == Eof`.
428418
let mut eof_mps = EofMatcherPositions::None;
429419

430420
while let Some(mut mp) = self.cur_mps.pop() {
431-
match &self.locs[mp.idx] {
421+
match &matcher[mp.idx] {
432422
MatcherLoc::Token { token: t } => {
433423
// If it's a doc comment, we just ignore it and move on to the next tt in the
434424
// matcher. This is a bug, but #95267 showed that existing programs rely on
@@ -536,7 +526,7 @@ impl TtParser {
536526
}
537527
MatcherLoc::Eof => {
538528
// We are past the matcher's end, and not in a sequence. Try to end things.
539-
debug_assert_eq!(mp.idx, self.locs.len() - 1);
529+
debug_assert_eq!(mp.idx, matcher.len() - 1);
540530
if *token == token::Eof {
541531
eof_mps = match eof_mps {
542532
EofMatcherPositions::None => EofMatcherPositions::One(mp),
@@ -554,11 +544,10 @@ impl TtParser {
554544
if *token == token::Eof {
555545
Some(match eof_mps {
556546
EofMatcherPositions::One(mut eof_mp) => {
557-
assert_eq!(eof_mp.matches.len(), num_metavar_decls);
558547
// Need to take ownership of the matches from within the `Lrc`.
559548
Lrc::make_mut(&mut eof_mp.matches);
560549
let matches = Lrc::try_unwrap(eof_mp.matches).unwrap().into_iter();
561-
self.nameize(sess, matches)
550+
self.nameize(sess, matcher, matches)
562551
}
563552
EofMatcherPositions::Multiple => {
564553
Error(token.span, "ambiguity: multiple successful parses".to_string())
@@ -580,10 +569,8 @@ impl TtParser {
580569
pub(super) fn parse_tt(
581570
&mut self,
582571
parser: &mut Cow<'_, Parser<'_>>,
583-
matcher: &[TokenTree],
572+
matcher: &[MatcherLoc],
584573
) -> NamedParseResult {
585-
let num_metavar_decls = self.compute_locs(parser.sess, matcher);
586-
587574
// A queue of possible matcher positions. We initialize it with the matcher position in
588575
// which the "dot" is before the first token of the first token tree in `matcher`.
589576
// `parse_tt_inner` then processes all of these possible matcher positions and produces
@@ -598,7 +585,7 @@ impl TtParser {
598585

599586
// Process `cur_mps` until either we have finished the input or we need to get some
600587
// parsing from the black-box parser done.
601-
if let Some(res) = self.parse_tt_inner(&parser.sess, num_metavar_decls, &parser.token) {
588+
if let Some(res) = self.parse_tt_inner(&parser.sess, matcher, &parser.token) {
602589
return res;
603590
}
604591

@@ -626,7 +613,7 @@ impl TtParser {
626613
(0, 1) => {
627614
// We need to call the black-box parser to get some nonterminal.
628615
let mut mp = self.bb_mps.pop().unwrap();
629-
let loc = &self.locs[mp.idx];
616+
let loc = &matcher[mp.idx];
630617
if let &MatcherLoc::MetaVarDecl {
631618
span,
632619
kind: Some(kind),
@@ -664,19 +651,23 @@ impl TtParser {
664651

665652
(_, _) => {
666653
// Too many possibilities!
667-
return self.ambiguity_error(parser.token.span);
654+
return self.ambiguity_error(matcher, parser.token.span);
668655
}
669656
}
670657

671658
assert!(!self.cur_mps.is_empty());
672659
}
673660
}
674661

675-
fn ambiguity_error(&self, token_span: rustc_span::Span) -> NamedParseResult {
662+
fn ambiguity_error(
663+
&self,
664+
matcher: &[MatcherLoc],
665+
token_span: rustc_span::Span,
666+
) -> NamedParseResult {
676667
let nts = self
677668
.bb_mps
678669
.iter()
679-
.map(|mp| match &self.locs[mp.idx] {
670+
.map(|mp| match &matcher[mp.idx] {
680671
MatcherLoc::MetaVarDecl { bind, kind: Some(kind), .. } => {
681672
format!("{} ('{}')", kind, bind)
682673
}
@@ -702,12 +693,13 @@ impl TtParser {
702693
fn nameize<I: Iterator<Item = NamedMatch>>(
703694
&self,
704695
sess: &ParseSess,
696+
matcher: &[MatcherLoc],
705697
mut res: I,
706698
) -> NamedParseResult {
707699
// Make that each metavar has _exactly one_ binding. If so, insert the binding into the
708700
// `NamedParseResult`. Otherwise, it's an error.
709701
let mut ret_val = FxHashMap::default();
710-
for loc in self.locs.iter() {
702+
for loc in matcher {
711703
if let &MatcherLoc::MetaVarDecl { span, bind, kind, .. } = loc {
712704
if kind.is_some() {
713705
match ret_val.entry(MacroRulesNormalizedIdent::new(bind)) {

‎compiler/rustc_expand/src/mbe/macro_rules.rs

Lines changed: 34 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ use crate::expand::{ensure_complete_parse, parse_ast_fragment, AstFragment, AstF
44
use crate::mbe;
55
use crate::mbe::macro_check;
66
use crate::mbe::macro_parser::{Error, ErrorReported, Failure, Success, TtParser};
7-
use crate::mbe::macro_parser::{MatchedSeq, MatchedTokenTree};
7+
use crate::mbe::macro_parser::{MatchedSeq, MatchedTokenTree, MatcherLoc};
88
use crate::mbe::transcribe::transcribe;
99

1010
use rustc_ast as ast;
@@ -159,7 +159,7 @@ struct MacroRulesMacroExpander {
159159
name: Ident,
160160
span: Span,
161161
transparency: Transparency,
162-
lhses: Vec<mbe::TokenTree>,
162+
lhses: Vec<Vec<MatcherLoc>>,
163163
rhses: Vec<mbe::TokenTree>,
164164
valid: bool,
165165
is_local: bool,
@@ -210,7 +210,7 @@ fn generic_extension<'cx, 'tt>(
210210
name: Ident,
211211
transparency: Transparency,
212212
arg: TokenStream,
213-
lhses: &'tt [mbe::TokenTree],
213+
lhses: &'tt [Vec<MatcherLoc>],
214214
rhses: &'tt [mbe::TokenTree],
215215
is_local: bool,
216216
) -> Box<dyn MacResult + 'cx> {
@@ -245,14 +245,6 @@ fn generic_extension<'cx, 'tt>(
245245
// this situation.)
246246
let parser = parser_from_cx(sess, arg.clone());
247247

248-
// A matcher is always delimited, but the delimiters are ignored.
249-
let delimited_inner_tts = |tt: &'tt mbe::TokenTree| -> &'tt [mbe::TokenTree] {
250-
match tt {
251-
mbe::TokenTree::Delimited(_, delimited) => delimited.inner_tts(),
252-
_ => cx.span_bug(sp, "malformed macro lhs"),
253-
}
254-
};
255-
256248
// Try each arm's matchers.
257249
let mut tt_parser = TtParser::new(name);
258250
for (i, lhs) in lhses.iter().enumerate() {
@@ -262,13 +254,19 @@ fn generic_extension<'cx, 'tt>(
262254
// are not recorded. On the first `Success(..)`ful matcher, the spans are merged.
263255
let mut gated_spans_snapshot = mem::take(&mut *sess.gated_spans.spans.borrow_mut());
264256

265-
match tt_parser.parse_tt(&mut Cow::Borrowed(&parser), delimited_inner_tts(lhs)) {
257+
match tt_parser.parse_tt(&mut Cow::Borrowed(&parser), lhs) {
266258
Success(named_matches) => {
267259
// The matcher was `Success(..)`ful.
268260
// Merge the gated spans from parsing the matcher with the pre-existing ones.
269261
sess.gated_spans.merge(gated_spans_snapshot);
270262

271-
let rhs = delimited_inner_tts(&rhses[i]).to_vec().clone();
263+
// Ignore the delimiters on the RHS.
264+
let rhs = match &rhses[i] {
265+
mbe::TokenTree::Delimited(_, delimited) => {
266+
delimited.inner_tts().to_vec().clone()
267+
}
268+
_ => cx.span_bug(sp, "malformed macro rhs"),
269+
};
272270
let arm_span = rhses[i].span();
273271

274272
let rhs_spans = rhs.iter().map(|t| t.span()).collect::<Vec<_>>();
@@ -346,10 +344,8 @@ fn generic_extension<'cx, 'tt>(
346344
// Check whether there's a missing comma in this macro call, like `println!("{}" a);`
347345
if let Some((arg, comma_span)) = arg.add_comma() {
348346
for lhs in lhses {
349-
if let Success(_) = tt_parser.parse_tt(
350-
&mut Cow::Borrowed(&parser_from_cx(sess, arg.clone())),
351-
delimited_inner_tts(lhs),
352-
) {
347+
let parser = parser_from_cx(sess, arg.clone());
348+
if let Success(_) = tt_parser.parse_tt(&mut Cow::Borrowed(&parser), lhs) {
353349
if comma_span.is_dummy() {
354350
err.note("you might be missing a comma");
355351
} else {
@@ -440,6 +436,8 @@ pub fn compile_declarative_macro(
440436
}),
441437
),
442438
];
439+
// Convert it into `MatcherLoc` form.
440+
let argument_gram = mbe::macro_parser::compute_locs(&sess.parse_sess, &argument_gram);
443441

444442
let parser = Parser::new(&sess.parse_sess, body, true, rustc_parse::MACRO_ARGUMENTS);
445443
let mut tt_parser = TtParser::new(def.ident);
@@ -536,6 +534,25 @@ pub fn compile_declarative_macro(
536534
None => {}
537535
}
538536

537+
// Convert the lhses into `MatcherLoc` form, which is better for doing the
538+
// actual matching. Unless the matcher is invalid.
539+
let lhses = if valid {
540+
lhses
541+
.iter()
542+
.map(|lhs| {
543+
// Ignore the delimiters around the matcher.
544+
match lhs {
545+
mbe::TokenTree::Delimited(_, delimited) => {
546+
mbe::macro_parser::compute_locs(&sess.parse_sess, delimited.inner_tts())
547+
}
548+
_ => sess.parse_sess.span_diagnostic.span_bug(def.span, "malformed macro lhs"),
549+
}
550+
})
551+
.collect()
552+
} else {
553+
vec![]
554+
};
555+
539556
mk_syn_ext(Box::new(MacroRulesMacroExpander {
540557
name: def.ident,
541558
span: def.span,

0 commit comments

Comments
 (0)
Please sign in to comment.