@@ -266,7 +266,7 @@ func (s *Scanner) next() rune {
266
266
s .srcPos += width
267
267
s .lastCharLen = width
268
268
s .column ++
269
- s .error ("illegal UTF-8 encoding" )
269
+ s .error ("invalid UTF-8 encoding" )
270
270
return ch
271
271
}
272
272
}
@@ -281,7 +281,7 @@ func (s *Scanner) next() rune {
281
281
switch ch {
282
282
case 0 :
283
283
// for compatibility with other tools
284
- s .error ("illegal character NUL" )
284
+ s .error ("invalid character NUL" )
285
285
case '\n' :
286
286
s .line ++
287
287
s .lastLineLen = s .column
@@ -335,6 +335,10 @@ func (s *Scanner) error(msg string) {
335
335
fmt .Fprintf (os .Stderr , "%s: %s\n " , pos , msg )
336
336
}
337
337
338
+ func (s * Scanner ) errorf (format string , args ... interface {}) {
339
+ s .error (fmt .Sprintf (format , args ... ))
340
+ }
341
+
338
342
func (s * Scanner ) isIdentRune (ch rune , i int ) bool {
339
343
if s .IsIdentRune != nil {
340
344
return s .IsIdentRune (ch , i )
@@ -351,95 +355,189 @@ func (s *Scanner) scanIdentifier() rune {
351
355
return ch
352
356
}
353
357
354
- func digitVal (ch rune ) int {
355
- switch {
356
- case '0' <= ch && ch <= '9' :
357
- return int (ch - '0' )
358
- case 'a' <= ch && ch <= 'f' :
359
- return int (ch - 'a' + 10 )
360
- case 'A' <= ch && ch <= 'F' :
361
- return int (ch - 'A' + 10 )
358
+ func lower (ch rune ) rune { return ('a' - 'A' ) | ch } // returns lower-case ch iff ch is ASCII letter
359
+ func isDecimal (ch rune ) bool { return '0' <= ch && ch <= '9' }
360
+ func isHex (ch rune ) bool { return '0' <= ch && ch <= '9' || 'a' <= lower (ch ) && lower (ch ) <= 'f' }
361
+
362
+ // digits accepts the sequence { digit | '_' } starting with ch0.
363
+ // If base <= 10, digits accepts any decimal digit but records
364
+ // the first invalid digit >= base in *invalid if *invalid == 0.
365
+ // digits returns the first rune that is not part of the sequence
366
+ // anymore, and a bitset describing whether the sequence contained
367
+ // digits (bit 0 is set), or separators '_' (bit 1 is set).
368
+ func (s * Scanner ) digits (ch0 rune , base int , invalid * rune ) (ch rune , digsep int ) {
369
+ ch = ch0
370
+ if base <= 10 {
371
+ max := rune ('0' + base )
372
+ for isDecimal (ch ) || ch == '_' {
373
+ ds := 1
374
+ if ch == '_' {
375
+ ds = 2
376
+ } else if ch >= max && * invalid == 0 {
377
+ * invalid = ch
378
+ }
379
+ digsep |= ds
380
+ ch = s .next ()
381
+ }
382
+ } else {
383
+ for isHex (ch ) || ch == '_' {
384
+ ds := 1
385
+ if ch == '_' {
386
+ ds = 2
387
+ }
388
+ digsep |= ds
389
+ ch = s .next ()
390
+ }
362
391
}
363
- return 16 // larger than any legal digit val
392
+ return
364
393
}
365
394
366
- func isDecimal (ch rune ) bool { return '0' <= ch && ch <= '9' }
395
+ func (s * Scanner ) scanNumber (ch rune , integerPart bool ) (rune , rune ) {
396
+ base := 10 // number base
397
+ prefix := rune (0 ) // one of 0 (decimal), '0' (0-octal), 'x', 'o', or 'b'
398
+ digsep := 0 // bit 0: digit present, bit 1: '_' present
399
+ invalid := rune (0 ) // invalid digit in literal, or 0
400
+
401
+ // integer part
402
+ var tok rune
403
+ var ds int
404
+ if integerPart {
405
+ tok = Int
406
+ if ch == '0' {
407
+ ch = s .next ()
408
+ switch lower (ch ) {
409
+ case 'x' :
410
+ ch = s .next ()
411
+ base , prefix = 16 , 'x'
412
+ case 'o' :
413
+ ch = s .next ()
414
+ base , prefix = 8 , 'o'
415
+ case 'b' :
416
+ ch = s .next ()
417
+ base , prefix = 2 , 'b'
418
+ default :
419
+ base , prefix = 8 , '0'
420
+ digsep = 1 // leading 0
421
+ }
422
+ }
423
+ ch , ds = s .digits (ch , base , & invalid )
424
+ digsep |= ds
425
+ }
367
426
368
- func (s * Scanner ) scanMantissa (ch rune ) rune {
369
- for isDecimal (ch ) {
370
- ch = s .next ()
427
+ // fractional part
428
+ if ! integerPart || ch == '.' {
429
+ tok = Float
430
+ if prefix == 'o' || prefix == 'b' {
431
+ s .error ("invalid radix point in " + litname (prefix ))
432
+ }
433
+ if ch == '.' {
434
+ ch = s .next ()
435
+ }
436
+ ch , ds = s .digits (ch , base , & invalid )
437
+ digsep |= ds
371
438
}
372
- return ch
373
- }
374
439
375
- func (s * Scanner ) scanFraction (ch rune ) rune {
376
- if ch == '.' {
377
- ch = s .scanMantissa (s .next ())
440
+ if digsep & 1 == 0 {
441
+ s .error (litname (prefix ) + " has no digits" )
378
442
}
379
- return ch
380
- }
381
443
382
- func (s * Scanner ) scanExponent (ch rune ) rune {
383
- if ch == 'e' || ch == 'E' {
444
+ // exponent
445
+ if e := lower (ch ); e == 'e' || e == 'p' {
446
+ switch {
447
+ case e == 'e' && prefix != 0 && prefix != '0' :
448
+ s .errorf ("%q exponent requires decimal mantissa" , ch )
449
+ case e == 'p' && prefix != 'x' :
450
+ s .errorf ("%q exponent requires hexadecimal mantissa" , ch )
451
+ }
384
452
ch = s .next ()
385
- if ch == '-' || ch == '+' {
453
+ tok = Float
454
+ if ch == '+' || ch == '-' {
386
455
ch = s .next ()
387
456
}
388
- if ! isDecimal (ch ) {
389
- s .error ("illegal exponent" )
457
+ ch , ds = s .digits (ch , 10 , nil )
458
+ digsep |= ds
459
+ if ds & 1 == 0 {
460
+ s .error ("exponent has no digits" )
390
461
}
391
- ch = s .scanMantissa (ch )
462
+ } else if prefix == 'x' && tok == Float {
463
+ s .error ("hexadecimal mantissa requires a 'p' exponent" )
392
464
}
393
- return ch
465
+
466
+ if tok == Int && invalid != 0 {
467
+ s .errorf ("invalid digit %q in %s" , invalid , litname (prefix ))
468
+ }
469
+
470
+ if digsep & 2 != 0 {
471
+ s .tokEnd = s .srcPos - s .lastCharLen // make sure token text is terminated
472
+ if i := invalidSep (s .TokenText ()); i >= 0 {
473
+ s .error ("'_' must separate successive digits" )
474
+ }
475
+ }
476
+
477
+ return tok , ch
394
478
}
395
479
396
- func (s * Scanner ) scanNumber (ch rune ) (rune , rune ) {
397
- // isDecimal(ch)
398
- if ch == '0' {
399
- // int or float
400
- ch = s .next ()
401
- if ch == 'x' || ch == 'X' {
402
- // hexadecimal int
403
- ch = s .next ()
404
- hasMantissa := false
405
- for digitVal (ch ) < 16 {
406
- ch = s .next ()
407
- hasMantissa = true
408
- }
409
- if ! hasMantissa {
410
- s .error ("illegal hexadecimal number" )
411
- }
412
- } else {
413
- // octal int or float
414
- has8or9 := false
415
- for isDecimal (ch ) {
416
- if ch > '7' {
417
- has8or9 = true
418
- }
419
- ch = s .next ()
420
- }
421
- if s .Mode & ScanFloats != 0 && (ch == '.' || ch == 'e' || ch == 'E' ) {
422
- // float
423
- ch = s .scanFraction (ch )
424
- ch = s .scanExponent (ch )
425
- return Float , ch
480
+ func litname (prefix rune ) string {
481
+ switch prefix {
482
+ default :
483
+ return "decimal literal"
484
+ case 'x' :
485
+ return "hexadecimal literal"
486
+ case 'o' , '0' :
487
+ return "octal literal"
488
+ case 'b' :
489
+ return "binary literal"
490
+ }
491
+ }
492
+
493
+ // invalidSep returns the index of the first invalid separator in x, or -1.
494
+ func invalidSep (x string ) int {
495
+ x1 := ' ' // prefix char, we only care if it's 'x'
496
+ d := '.' // digit, one of '_', '0' (a digit), or '.' (anything else)
497
+ i := 0
498
+
499
+ // a prefix counts as a digit
500
+ if len (x ) >= 2 && x [0 ] == '0' {
501
+ x1 = lower (rune (x [1 ]))
502
+ if x1 == 'x' || x1 == 'o' || x1 == 'b' {
503
+ d = '0'
504
+ i = 2
505
+ }
506
+ }
507
+
508
+ // mantissa and exponent
509
+ for ; i < len (x ); i ++ {
510
+ p := d // previous digit
511
+ d = rune (x [i ])
512
+ switch {
513
+ case d == '_' :
514
+ if p != '0' {
515
+ return i
426
516
}
427
- // octal int
428
- if has8or9 {
429
- s .error ("illegal octal number" )
517
+ case isDecimal (d ) || x1 == 'x' && isHex (d ):
518
+ d = '0'
519
+ default :
520
+ if p == '_' {
521
+ return i - 1
430
522
}
523
+ d = '.'
431
524
}
432
- return Int , ch
433
525
}
434
- // decimal int or float
435
- ch = s .scanMantissa (ch )
436
- if s .Mode & ScanFloats != 0 && (ch == '.' || ch == 'e' || ch == 'E' ) {
437
- // float
438
- ch = s .scanFraction (ch )
439
- ch = s .scanExponent (ch )
440
- return Float , ch
526
+ if d == '_' {
527
+ return len (x ) - 1
528
+ }
529
+
530
+ return - 1
531
+ }
532
+
533
+ func digitVal (ch rune ) int {
534
+ switch {
535
+ case '0' <= ch && ch <= '9' :
536
+ return int (ch - '0' )
537
+ case 'a' <= lower (ch ) && lower (ch ) <= 'f' :
538
+ return int (lower (ch ) - 'a' + 10 )
441
539
}
442
- return Int , ch
540
+ return 16 // larger than any legal digit val
443
541
}
444
542
445
543
func (s * Scanner ) scanDigits (ch rune , base , n int ) rune {
@@ -448,7 +546,7 @@ func (s *Scanner) scanDigits(ch rune, base, n int) rune {
448
546
n --
449
547
}
450
548
if n > 0 {
451
- s .error ("illegal char escape" )
549
+ s .error ("invalid char escape" )
452
550
}
453
551
return ch
454
552
}
@@ -468,7 +566,7 @@ func (s *Scanner) scanEscape(quote rune) rune {
468
566
case 'U' :
469
567
ch = s .scanDigits (s .next (), 16 , 8 )
470
568
default :
471
- s .error ("illegal char escape" )
569
+ s .error ("invalid char escape" )
472
570
}
473
571
return ch
474
572
}
@@ -503,7 +601,7 @@ func (s *Scanner) scanRawString() {
503
601
504
602
func (s * Scanner ) scanChar () {
505
603
if s .scanString ('\'' ) != 1 {
506
- s .error ("illegal char literal" )
604
+ s .error ("invalid char literal" )
507
605
}
508
606
}
509
607
@@ -584,7 +682,7 @@ redo:
584
682
}
585
683
case isDecimal (ch ):
586
684
if s .Mode & (ScanInts | ScanFloats ) != 0 {
587
- tok , ch = s .scanNumber (ch )
685
+ tok , ch = s .scanNumber (ch , true )
588
686
} else {
589
687
ch = s .next ()
590
688
}
@@ -607,9 +705,7 @@ redo:
607
705
case '.' :
608
706
ch = s .next ()
609
707
if isDecimal (ch ) && s .Mode & ScanFloats != 0 {
610
- tok = Float
611
- ch = s .scanMantissa (ch )
612
- ch = s .scanExponent (ch )
708
+ tok , ch = s .scanNumber (ch , false )
613
709
}
614
710
case '/' :
615
711
ch = s .next ()
0 commit comments