Skip to content

Commit 710417b

Browse files
committed
text/scanner: accept new Go2 number literals
This CL introduces text/scanner support for the new binary and octal integer literals, hexadecimal floats, and digit separators for all number literals. The new code is closely mirroring the respective code for number literals in cmd/compile/internal/syntax/scanner.go. Uniformly use the term "invalid" rather than "illegal" in error messages to match the respective error messages in the other scanners directly. R=Go1.13 Updates #12711. Updates #19308. Updates #28493. Updates #29008. Change-Id: I2f291de13ba5afc0e530cd8326e6bf4c3858ebac Reviewed-on: https://go-review.googlesource.com/c/161199 Run-TryBot: Robert Griesemer <[email protected]> TryBot-Result: Gobot Gobot <[email protected]> Reviewed-by: Ian Lance Taylor <[email protected]>
1 parent 33ac854 commit 710417b

File tree

2 files changed

+371
-109
lines changed

2 files changed

+371
-109
lines changed

src/text/scanner/scanner.go

Lines changed: 174 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -266,7 +266,7 @@ func (s *Scanner) next() rune {
266266
s.srcPos += width
267267
s.lastCharLen = width
268268
s.column++
269-
s.error("illegal UTF-8 encoding")
269+
s.error("invalid UTF-8 encoding")
270270
return ch
271271
}
272272
}
@@ -281,7 +281,7 @@ func (s *Scanner) next() rune {
281281
switch ch {
282282
case 0:
283283
// for compatibility with other tools
284-
s.error("illegal character NUL")
284+
s.error("invalid character NUL")
285285
case '\n':
286286
s.line++
287287
s.lastLineLen = s.column
@@ -335,6 +335,10 @@ func (s *Scanner) error(msg string) {
335335
fmt.Fprintf(os.Stderr, "%s: %s\n", pos, msg)
336336
}
337337

338+
func (s *Scanner) errorf(format string, args ...interface{}) {
339+
s.error(fmt.Sprintf(format, args...))
340+
}
341+
338342
func (s *Scanner) isIdentRune(ch rune, i int) bool {
339343
if s.IsIdentRune != nil {
340344
return s.IsIdentRune(ch, i)
@@ -351,95 +355,189 @@ func (s *Scanner) scanIdentifier() rune {
351355
return ch
352356
}
353357

354-
func digitVal(ch rune) int {
355-
switch {
356-
case '0' <= ch && ch <= '9':
357-
return int(ch - '0')
358-
case 'a' <= ch && ch <= 'f':
359-
return int(ch - 'a' + 10)
360-
case 'A' <= ch && ch <= 'F':
361-
return int(ch - 'A' + 10)
358+
func lower(ch rune) rune { return ('a' - 'A') | ch } // returns lower-case ch iff ch is ASCII letter
359+
func isDecimal(ch rune) bool { return '0' <= ch && ch <= '9' }
360+
func isHex(ch rune) bool { return '0' <= ch && ch <= '9' || 'a' <= lower(ch) && lower(ch) <= 'f' }
361+
362+
// digits accepts the sequence { digit | '_' } starting with ch0.
363+
// If base <= 10, digits accepts any decimal digit but records
364+
// the first invalid digit >= base in *invalid if *invalid == 0.
365+
// digits returns the first rune that is not part of the sequence
366+
// anymore, and a bitset describing whether the sequence contained
367+
// digits (bit 0 is set), or separators '_' (bit 1 is set).
368+
func (s *Scanner) digits(ch0 rune, base int, invalid *rune) (ch rune, digsep int) {
369+
ch = ch0
370+
if base <= 10 {
371+
max := rune('0' + base)
372+
for isDecimal(ch) || ch == '_' {
373+
ds := 1
374+
if ch == '_' {
375+
ds = 2
376+
} else if ch >= max && *invalid == 0 {
377+
*invalid = ch
378+
}
379+
digsep |= ds
380+
ch = s.next()
381+
}
382+
} else {
383+
for isHex(ch) || ch == '_' {
384+
ds := 1
385+
if ch == '_' {
386+
ds = 2
387+
}
388+
digsep |= ds
389+
ch = s.next()
390+
}
362391
}
363-
return 16 // larger than any legal digit val
392+
return
364393
}
365394

366-
func isDecimal(ch rune) bool { return '0' <= ch && ch <= '9' }
395+
func (s *Scanner) scanNumber(ch rune, integerPart bool) (rune, rune) {
396+
base := 10 // number base
397+
prefix := rune(0) // one of 0 (decimal), '0' (0-octal), 'x', 'o', or 'b'
398+
digsep := 0 // bit 0: digit present, bit 1: '_' present
399+
invalid := rune(0) // invalid digit in literal, or 0
400+
401+
// integer part
402+
var tok rune
403+
var ds int
404+
if integerPart {
405+
tok = Int
406+
if ch == '0' {
407+
ch = s.next()
408+
switch lower(ch) {
409+
case 'x':
410+
ch = s.next()
411+
base, prefix = 16, 'x'
412+
case 'o':
413+
ch = s.next()
414+
base, prefix = 8, 'o'
415+
case 'b':
416+
ch = s.next()
417+
base, prefix = 2, 'b'
418+
default:
419+
base, prefix = 8, '0'
420+
digsep = 1 // leading 0
421+
}
422+
}
423+
ch, ds = s.digits(ch, base, &invalid)
424+
digsep |= ds
425+
}
367426

368-
func (s *Scanner) scanMantissa(ch rune) rune {
369-
for isDecimal(ch) {
370-
ch = s.next()
427+
// fractional part
428+
if !integerPart || ch == '.' {
429+
tok = Float
430+
if prefix == 'o' || prefix == 'b' {
431+
s.error("invalid radix point in " + litname(prefix))
432+
}
433+
if ch == '.' {
434+
ch = s.next()
435+
}
436+
ch, ds = s.digits(ch, base, &invalid)
437+
digsep |= ds
371438
}
372-
return ch
373-
}
374439

375-
func (s *Scanner) scanFraction(ch rune) rune {
376-
if ch == '.' {
377-
ch = s.scanMantissa(s.next())
440+
if digsep&1 == 0 {
441+
s.error(litname(prefix) + " has no digits")
378442
}
379-
return ch
380-
}
381443

382-
func (s *Scanner) scanExponent(ch rune) rune {
383-
if ch == 'e' || ch == 'E' {
444+
// exponent
445+
if e := lower(ch); e == 'e' || e == 'p' {
446+
switch {
447+
case e == 'e' && prefix != 0 && prefix != '0':
448+
s.errorf("%q exponent requires decimal mantissa", ch)
449+
case e == 'p' && prefix != 'x':
450+
s.errorf("%q exponent requires hexadecimal mantissa", ch)
451+
}
384452
ch = s.next()
385-
if ch == '-' || ch == '+' {
453+
tok = Float
454+
if ch == '+' || ch == '-' {
386455
ch = s.next()
387456
}
388-
if !isDecimal(ch) {
389-
s.error("illegal exponent")
457+
ch, ds = s.digits(ch, 10, nil)
458+
digsep |= ds
459+
if ds&1 == 0 {
460+
s.error("exponent has no digits")
390461
}
391-
ch = s.scanMantissa(ch)
462+
} else if prefix == 'x' && tok == Float {
463+
s.error("hexadecimal mantissa requires a 'p' exponent")
392464
}
393-
return ch
465+
466+
if tok == Int && invalid != 0 {
467+
s.errorf("invalid digit %q in %s", invalid, litname(prefix))
468+
}
469+
470+
if digsep&2 != 0 {
471+
s.tokEnd = s.srcPos - s.lastCharLen // make sure token text is terminated
472+
if i := invalidSep(s.TokenText()); i >= 0 {
473+
s.error("'_' must separate successive digits")
474+
}
475+
}
476+
477+
return tok, ch
394478
}
395479

396-
func (s *Scanner) scanNumber(ch rune) (rune, rune) {
397-
// isDecimal(ch)
398-
if ch == '0' {
399-
// int or float
400-
ch = s.next()
401-
if ch == 'x' || ch == 'X' {
402-
// hexadecimal int
403-
ch = s.next()
404-
hasMantissa := false
405-
for digitVal(ch) < 16 {
406-
ch = s.next()
407-
hasMantissa = true
408-
}
409-
if !hasMantissa {
410-
s.error("illegal hexadecimal number")
411-
}
412-
} else {
413-
// octal int or float
414-
has8or9 := false
415-
for isDecimal(ch) {
416-
if ch > '7' {
417-
has8or9 = true
418-
}
419-
ch = s.next()
420-
}
421-
if s.Mode&ScanFloats != 0 && (ch == '.' || ch == 'e' || ch == 'E') {
422-
// float
423-
ch = s.scanFraction(ch)
424-
ch = s.scanExponent(ch)
425-
return Float, ch
480+
func litname(prefix rune) string {
481+
switch prefix {
482+
default:
483+
return "decimal literal"
484+
case 'x':
485+
return "hexadecimal literal"
486+
case 'o', '0':
487+
return "octal literal"
488+
case 'b':
489+
return "binary literal"
490+
}
491+
}
492+
493+
// invalidSep returns the index of the first invalid separator in x, or -1.
494+
func invalidSep(x string) int {
495+
x1 := ' ' // prefix char, we only care if it's 'x'
496+
d := '.' // digit, one of '_', '0' (a digit), or '.' (anything else)
497+
i := 0
498+
499+
// a prefix counts as a digit
500+
if len(x) >= 2 && x[0] == '0' {
501+
x1 = lower(rune(x[1]))
502+
if x1 == 'x' || x1 == 'o' || x1 == 'b' {
503+
d = '0'
504+
i = 2
505+
}
506+
}
507+
508+
// mantissa and exponent
509+
for ; i < len(x); i++ {
510+
p := d // previous digit
511+
d = rune(x[i])
512+
switch {
513+
case d == '_':
514+
if p != '0' {
515+
return i
426516
}
427-
// octal int
428-
if has8or9 {
429-
s.error("illegal octal number")
517+
case isDecimal(d) || x1 == 'x' && isHex(d):
518+
d = '0'
519+
default:
520+
if p == '_' {
521+
return i - 1
430522
}
523+
d = '.'
431524
}
432-
return Int, ch
433525
}
434-
// decimal int or float
435-
ch = s.scanMantissa(ch)
436-
if s.Mode&ScanFloats != 0 && (ch == '.' || ch == 'e' || ch == 'E') {
437-
// float
438-
ch = s.scanFraction(ch)
439-
ch = s.scanExponent(ch)
440-
return Float, ch
526+
if d == '_' {
527+
return len(x) - 1
528+
}
529+
530+
return -1
531+
}
532+
533+
func digitVal(ch rune) int {
534+
switch {
535+
case '0' <= ch && ch <= '9':
536+
return int(ch - '0')
537+
case 'a' <= lower(ch) && lower(ch) <= 'f':
538+
return int(lower(ch) - 'a' + 10)
441539
}
442-
return Int, ch
540+
return 16 // larger than any legal digit val
443541
}
444542

445543
func (s *Scanner) scanDigits(ch rune, base, n int) rune {
@@ -448,7 +546,7 @@ func (s *Scanner) scanDigits(ch rune, base, n int) rune {
448546
n--
449547
}
450548
if n > 0 {
451-
s.error("illegal char escape")
549+
s.error("invalid char escape")
452550
}
453551
return ch
454552
}
@@ -468,7 +566,7 @@ func (s *Scanner) scanEscape(quote rune) rune {
468566
case 'U':
469567
ch = s.scanDigits(s.next(), 16, 8)
470568
default:
471-
s.error("illegal char escape")
569+
s.error("invalid char escape")
472570
}
473571
return ch
474572
}
@@ -503,7 +601,7 @@ func (s *Scanner) scanRawString() {
503601

504602
func (s *Scanner) scanChar() {
505603
if s.scanString('\'') != 1 {
506-
s.error("illegal char literal")
604+
s.error("invalid char literal")
507605
}
508606
}
509607

@@ -584,7 +682,7 @@ redo:
584682
}
585683
case isDecimal(ch):
586684
if s.Mode&(ScanInts|ScanFloats) != 0 {
587-
tok, ch = s.scanNumber(ch)
685+
tok, ch = s.scanNumber(ch, true)
588686
} else {
589687
ch = s.next()
590688
}
@@ -607,9 +705,7 @@ redo:
607705
case '.':
608706
ch = s.next()
609707
if isDecimal(ch) && s.Mode&ScanFloats != 0 {
610-
tok = Float
611-
ch = s.scanMantissa(ch)
612-
ch = s.scanExponent(ch)
708+
tok, ch = s.scanNumber(ch, false)
613709
}
614710
case '/':
615711
ch = s.next()

0 commit comments

Comments
 (0)