@@ -106,6 +106,8 @@ def initialize(content, options)
106
106
@rests = [ ]
107
107
@seek = 0
108
108
109
+ @heredoc_queue = [ ]
110
+
109
111
@indent = 0
110
112
@indent_stack = [ ]
111
113
@lex_state = :EXPR_BEG
@@ -464,21 +466,43 @@ def lex_init()
464
466
465
467
@OP . def_rule ( "\n " ) do |op , io |
466
468
print "\\ n\n " if RDoc ::RubyLex . debug?
467
- case @lex_state
468
- when :EXPR_BEG , :EXPR_FNAME , :EXPR_DOT
469
- @continue = true
470
- else
471
- @continue = false
472
- @lex_state = :EXPR_BEG
473
- until ( @indent_stack . empty? ||
474
- [ TkLPAREN , TkLBRACK , TkLBRACE ,
475
- TkfLPAREN , TkfLBRACK , TkfLBRACE ] . include? ( @indent_stack . last ) )
476
- @indent_stack . pop
469
+ unless @heredoc_queue . empty?
470
+ info = @heredoc_queue [ 0 ]
471
+ if !info [ :started ] # "\n"
472
+ info [ :started ] = true
473
+ ungetc "\n "
474
+ elsif info [ :heredoc_end ] . nil? # heredoc body
475
+ tk , heredoc_end = identify_here_document_body ( info [ :quoted ] , info [ :lt ] , info [ :indent ] )
476
+ info [ :heredoc_end ] = heredoc_end
477
+ ungetc "\n "
478
+ else # heredoc end
479
+ @heredoc_queue . shift
480
+ @lex_state = :EXPR_BEG
481
+ tk = Token ( TkHEREDOCEND , info [ :heredoc_end ] )
482
+ if !@heredoc_queue . empty?
483
+ @heredoc_queue [ 0 ] [ :started ] = true
484
+ ungetc "\n "
485
+ end
477
486
end
478
487
end
479
- @current_readed = @readed
480
- @here_readed . clear
481
- Token ( TkNL )
488
+ unless tk
489
+ case @lex_state
490
+ when :EXPR_BEG , :EXPR_FNAME , :EXPR_DOT
491
+ @continue = true
492
+ else
493
+ @continue = false
494
+ @lex_state = :EXPR_BEG
495
+ until ( @indent_stack . empty? ||
496
+ [ TkLPAREN , TkLBRACK , TkLBRACE ,
497
+ TkfLPAREN , TkfLBRACK , TkfLBRACE ] . include? ( @indent_stack . last ) )
498
+ @indent_stack . pop
499
+ end
500
+ end
501
+ @current_readed = @readed
502
+ @here_readed . clear
503
+ tk = Token ( TkNL )
504
+ end
505
+ tk
482
506
end
483
507
484
508
@OP . def_rules ( "=" ) do
@@ -509,6 +533,12 @@ def lex_init()
509
533
tk
510
534
end
511
535
536
+ @OP . def_rules ( "->" ) do
537
+ |op , io |
538
+ @lex_state = :EXPR_ENDFN
539
+ Token ( op )
540
+ end
541
+
512
542
@OP . def_rules ( "!" , "!=" , "!~" ) do
513
543
|op , io |
514
544
case @lex_state
@@ -527,8 +557,8 @@ def lex_init()
527
557
if @lex_state != :EXPR_END && @lex_state != :EXPR_CLASS &&
528
558
( @lex_state != :EXPR_ARG || @space_seen )
529
559
c = peek ( 0 )
530
- if /\S / =~ c && ( /["'`]/ =~ c || /\w / =~ c || c == "-" )
531
- tk = identify_here_document
560
+ if /\S / =~ c && ( /["'`]/ =~ c || /\w / =~ c || c == "-" || c == "~" )
561
+ tk = identify_here_document ( op )
532
562
end
533
563
end
534
564
unless tk
@@ -837,14 +867,11 @@ def lex_int2
837
867
838
868
@OP . def_rule ( '\\' ) do
839
869
|op , io |
840
- if getc == "\n "
870
+ if peek ( 0 ) == "\n "
841
871
@space_seen = true
842
872
@continue = true
843
- Token ( TkSPACE )
844
- else
845
- ungetc
846
- Token ( "\\ " )
847
873
end
874
+ Token ( "\\ " )
848
875
end
849
876
850
877
@OP . def_rule ( '%' ) do
@@ -1053,7 +1080,11 @@ def identify_identifier
1053
1080
end
1054
1081
1055
1082
if token [ 0 , 1 ] =~ /[A-Z]/
1056
- return Token ( TkCONSTANT , token )
1083
+ if token [ -1 ] =~ /[!?]/
1084
+ return Token ( TkIDENTIFIER , token )
1085
+ else
1086
+ return Token ( TkCONSTANT , token )
1087
+ end
1057
1088
elsif token [ token . size - 1 , 1 ] =~ /[!?]/
1058
1089
return Token ( TkFID , token )
1059
1090
else
@@ -1066,77 +1097,63 @@ def identify_identifier
1066
1097
end
1067
1098
end
1068
1099
1069
- def identify_here_document
1100
+ def identify_here_document ( op )
1070
1101
ch = getc
1102
+ start_token = op
1071
1103
# if lt = PERCENT_LTYPE[ch]
1072
- if ch == "-"
1104
+ if ch == "-" or ch == "~"
1105
+ start_token . concat ch
1073
1106
ch = getc
1074
1107
indent = true
1075
1108
end
1076
1109
if /['"`]/ =~ ch
1110
+ start_token . concat ch
1077
1111
user_quote = lt = ch
1078
1112
quoted = ""
1079
1113
while ( c = getc ) && c != lt
1080
1114
quoted . concat c
1081
1115
end
1116
+ start_token . concat quoted
1117
+ start_token . concat lt
1082
1118
else
1083
1119
user_quote = nil
1084
1120
lt = '"'
1085
1121
quoted = ch . dup
1086
1122
while ( c = getc ) && c =~ /\w /
1087
1123
quoted . concat c
1088
1124
end
1125
+ start_token . concat quoted
1089
1126
ungetc
1090
1127
end
1091
1128
1092
- ltback , @ltype = @ltype , lt
1093
- reserve = [ ]
1094
- while ch = getc
1095
- reserve . push ch
1096
- if ch == "\\ "
1097
- reserve . push ch = getc
1098
- elsif ch == "\n "
1099
- break
1100
- end
1101
- end
1102
-
1103
- output_heredoc = reserve . join =~ /\A \r ?\n \z /
1129
+ @heredoc_queue << {
1130
+ quoted : quoted ,
1131
+ lt : lt ,
1132
+ indent : indent ,
1133
+ started : false
1134
+ }
1135
+ @lex_state = :EXPR_BEG
1136
+ Token ( RDoc ::RubyLex ::TkHEREDOCBEG , start_token )
1137
+ end
1104
1138
1105
- if output_heredoc then
1106
- doc = '<<'
1107
- doc << '-' if indent
1108
- doc << "#{ user_quote } #{ quoted } #{ user_quote } \n "
1109
- else
1110
- doc = '"'
1111
- end
1139
+ def identify_here_document_body ( quoted , lt , indent )
1140
+ ltback , @ltype = @ltype , lt
1112
1141
1113
- @current_readed = @readed
1142
+ doc = ""
1143
+ heredoc_end = nil
1114
1144
while l = gets
1115
1145
l = l . sub ( /(:?\r )?\n \z / , "\n " )
1116
1146
if ( indent ? l . strip : l . chomp ) == quoted
1147
+ heredoc_end = l
1117
1148
break
1118
1149
end
1119
1150
doc << l
1120
1151
end
1152
+ raise Error , "Missing terminating #{ quoted } for string" unless heredoc_end
1121
1153
1122
- if output_heredoc then
1123
- raise Error , "Missing terminating #{ quoted } for string" unless l
1124
-
1125
- doc << l . chomp
1126
- else
1127
- doc << '"'
1128
- end
1129
-
1130
- @current_readed = @here_readed
1131
- @here_readed . concat reserve
1132
- while ch = reserve . pop
1133
- ungetc ch
1134
- end
1135
-
1136
- token_class = output_heredoc ? RDoc ::RubyLex ::TkHEREDOC : Ltype2Token [ lt ]
1137
1154
@ltype = ltback
1138
- @lex_state = :EXPR_END
1139
- Token ( token_class , doc )
1155
+ @lex_state = :EXPR_BEG
1156
+ [ Token ( RDoc :: RubyLex :: TkHEREDOC , doc ) , heredoc_end ]
1140
1157
end
1141
1158
1142
1159
def identify_quotation
@@ -1163,7 +1180,7 @@ def identify_number(op = "")
1163
1180
1164
1181
num = op
1165
1182
1166
- if peek ( 0 ) == "0" && peek ( 1 ) !~ /[.eE ]/
1183
+ if peek ( 0 ) == "0" && peek ( 1 ) !~ /[.eEri ]/
1167
1184
num << getc
1168
1185
1169
1186
case peek ( 0 )
@@ -1292,7 +1309,7 @@ def identify_string(ltype, quoted = ltype, type = nil)
1292
1309
str = if ltype == quoted and %w[ " ' / ` ] . include? ltype then
1293
1310
ltype . dup
1294
1311
else
1295
- "%#{ type or PERCENT_LTYPE . key ltype } #{ PERCENT_PAREN_REV [ quoted ] ||quoted } "
1312
+ "%#{ type } #{ PERCENT_PAREN_REV [ quoted ] ||quoted } "
1296
1313
end
1297
1314
1298
1315
subtype = nil
0 commit comments