cmd/asm: factor out line parsing from assembling

aclements · aclements · commit 52b222055908 · 2018-11-12T20:46:25.000Z
Currently cmd/asm's Parser.line both consumes a line of assembly from the lexer and assembles it. This CL separates these two steps so that the line parser can be reused for purposes other than generating a Prog stream. For #27539. Updates #17544. Change-Id: I452c9a2112fbcc1c94bf909efc0d1fcc71014812 Reviewed-on: https://go-review.googlesource.com/c/147097 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Cherry Zhang <cherryyz@google.com>
diff --git a/src/cmd/asm/internal/asm/line_test.go b/src/cmd/asm/internal/asm/line_test.go
@@ -38,8 +38,7 @@ func testBadInstParser(t *testing.T, goarch string, tests []badInstTest) {
 		parser := NewParser(ctxt, arch, tokenizer)
 
 		err := tryParse(t, func() {
-			parser.start(lex.Tokenize(test.input))
-			parser.line()
+			parser.Parse()
 		})
 
 		switch {
diff --git a/src/cmd/asm/internal/asm/parse.go b/src/cmd/asm/internal/asm/parse.go
@@ -91,7 +91,23 @@ func (p *Parser) pos() src.XPos {
 }
 
 func (p *Parser) Parse() (*obj.Prog, bool) {
-	for p.line() {
+	scratch := make([][]lex.Token, 0, 3)
+	for {
+		word, cond, operands, ok := p.line(scratch)
+		if !ok {
+			break
+		}
+		scratch = operands
+
+		if p.pseudo(word, operands) {
+			continue
+		}
+		i, present := p.arch.Instructions[word]
+		if present {
+			p.instruction(i, word, cond, operands)
+			continue
+		}
+		p.errorf("unrecognized instruction %q", word)
 	}
 	if p.errorCount > 0 {
 		return nil, false
@@ -100,8 +116,17 @@ func (p *Parser) Parse() (*obj.Prog, bool) {
 	return p.firstProg, true
 }
 
-// WORD [ arg {, arg} ] (';' | '\n')
-func (p *Parser) line() bool {
+// line consumes a single assembly line from p.lex of the form
+//
+//   {label:} WORD[.cond] [ arg {, arg} ] (';' | '\n')
+//
+// It adds any labels to p.pendingLabels and returns the word, cond,
+// operand list, and true. If there is an error or EOF, it returns
+// ok=false.
+//
+// line may reuse the memory from scratch.
+func (p *Parser) line(scratch [][]lex.Token) (word, cond string, operands [][]lex.Token, ok bool) {
+next:
 	// Skip newlines.
 	var tok lex.ScanToken
 	for {
@@ -114,24 +139,29 @@ func (p *Parser) line() bool {
 		case '\n', ';':
 			continue
 		case scanner.EOF:
-			return false
+			return "", "", nil, false
 		}
 		break
 	}
 	// First item must be an identifier.
 	if tok != scanner.Ident {
 		p.errorf("expected identifier, found %q", p.lex.Text())
-		return false // Might as well stop now.
+		return "", "", nil, false // Might as well stop now.
 	}
-	word := p.lex.Text()
-	var cond string
-	operands := make([][]lex.Token, 0, 3)
+	word, cond = p.lex.Text(), ""
+	operands = scratch[:0]
 	// Zero or more comma-separated operands, one per loop.
 	nesting := 0
 	colon := -1
 	for tok != '\n' && tok != ';' {
 		// Process one operand.
-		items := make([]lex.Token, 0, 3)
+		var items []lex.Token
+		if cap(operands) > len(operands) {
+			// Reuse scratch items slice.
+			items = operands[:cap(operands)][len(operands)][:0]
+		} else {
+			items = make([]lex.Token, 0, 3)
+		}
 		for {
 			tok = p.lex.Next()
 			if len(operands) == 0 && len(items) == 0 {
@@ -148,12 +178,12 @@ func (p *Parser) line() bool {
 				if tok == ':' {
 					// Labels.
 					p.pendingLabels = append(p.pendingLabels, word)
-					return true
+					goto next
 				}
 			}
 			if tok == scanner.EOF {
 				p.errorf("unexpected EOF")
-				return false
+				return "", "", nil, false
 			}
 			// Split operands on comma. Also, the old syntax on x86 for a "register pair"
 			// was AX:DX, for which the new syntax is DX, AX. Note the reordering.
@@ -162,7 +192,7 @@ func (p *Parser) line() bool {
 					// Remember this location so we can swap the operands below.
 					if colon >= 0 {
 						p.errorf("invalid ':' in operand")
-						return true
+						return word, cond, operands, true
 					}
 					colon = len(operands)
 				}
@@ -188,16 +218,7 @@ func (p *Parser) line() bool {
 			p.errorf("missing operand")
 		}
 	}
-	if p.pseudo(word, operands) {
-		return true
-	}
-	i, present := p.arch.Instructions[word]
-	if present {
-		p.instruction(i, word, cond, operands)
-		return true
-	}
-	p.errorf("unrecognized instruction %q", word)
-	return true
+	return word, cond, operands, true
 }
 
 func (p *Parser) instruction(op obj.As, word, cond string, operands [][]lex.Token) {