ruby · hsbt · Aug 8, 2017 · Aug 2, 2017 · Aug 3, 2017
diff --git a/lib/rdoc/ruby_lex.rb b/lib/rdoc/ruby_lex.rb
@@ -106,6 +106,8 @@ def initialize(content, options)
     @rests = []
     @seek = 0
 
+    @heredoc_queue = []
+
     @indent = 0
     @indent_stack = []
     @lex_state = :EXPR_BEG
@@ -462,21 +464,43 @@ def lex_init()
 
     @OP.def_rule("\n") do |op, io|
       print "\\n\n" if RDoc::RubyLex.debug?
-      case @lex_state
-      when :EXPR_BEG, :EXPR_FNAME, :EXPR_DOT
-        @continue = true
-      else
-        @continue = false
-        @lex_state = :EXPR_BEG
-        until (@indent_stack.empty? ||
-               [TkLPAREN, TkLBRACK, TkLBRACE,
-                 TkfLPAREN, TkfLBRACK, TkfLBRACE].include?(@indent_stack.last))
-          @indent_stack.pop
+      unless @heredoc_queue.empty?
+        info = @heredoc_queue[0]
+        if !info[:started] # "\n"
+          info[:started] = true
+          ungetc "\n"
+        elsif info[:heredoc_end].nil? # heredoc body
+          tk, heredoc_end = identify_here_document_body(info[:quoted], info[:lt], info[:indent])
+          info[:heredoc_end] = heredoc_end
+          ungetc "\n"
+        else # heredoc end
+          @heredoc_queue.shift
+          @lex_state = :EXPR_BEG
+          tk = Token(TkHEREDOCEND, info[:heredoc_end])
+          if !@heredoc_queue.empty?
+            @heredoc_queue[0][:started] = true
+            ungetc "\n"
+          end
         end
       end
-      @current_readed = @readed
-      @here_readed.clear
-      Token(TkNL)
+      unless tk
+        case @lex_state
+        when :EXPR_BEG, :EXPR_FNAME, :EXPR_DOT
+          @continue = true
+        else
+          @continue = false
+          @lex_state = :EXPR_BEG
+          until (@indent_stack.empty? ||
+                 [TkLPAREN, TkLBRACK, TkLBRACE,
+                   TkfLPAREN, TkfLBRACK, TkfLBRACE].include?(@indent_stack.last))
+            @indent_stack.pop
+          end
+        end
+        @current_readed = @readed
+        @here_readed.clear
+        tk = Token(TkNL)
+      end
+      tk
     end
 
     @OP.def_rules("*", "**",
@@ -506,8 +530,8 @@ def lex_init()
       if @lex_state != :EXPR_END && @lex_state != :EXPR_CLASS &&
          (@lex_state != :EXPR_ARG || @space_seen)
         c = peek(0)
-        if /\S/ =~ c && (/["'`]/ =~ c || /\w/ =~ c || c == "-")
-          tk = identify_here_document
+        if /\S/ =~ c && (/["'`]/ =~ c || /\w/ =~ c || c == "-" || c == "~")
+          tk = identify_here_document(op)
         end
       end
       unless tk
@@ -985,77 +1009,63 @@ def identify_identifier
     end
   end
 
-  def identify_here_document
+  def identify_here_document(op)
     ch = getc
+    start_token = op
     #    if lt = PERCENT_LTYPE[ch]
-    if ch == "-"
+    if ch == "-" or ch == "~"
+      start_token.concat ch
       ch = getc
       indent = true
     end
     if /['"`]/ =~ ch
+      start_token.concat ch
       user_quote = lt = ch
       quoted = ""
       while (c = getc) && c != lt
         quoted.concat c
       end
+      start_token.concat quoted
+      start_token.concat lt
     else
       user_quote = nil
       lt = '"'
       quoted = ch.dup
       while (c = getc) && c =~ /\w/
         quoted.concat c
       end
+      start_token.concat quoted
       ungetc
     end
 
-    ltback, @ltype = @ltype, lt
-    reserve = []
-    while ch = getc
-      reserve.push ch
-      if ch == "\\"
-        reserve.push ch = getc
-      elsif ch == "\n"
-        break
-      end
-    end
-
-    output_heredoc = reserve.join =~ /\A\r?\n\z/
+    @heredoc_queue << {
+      quoted: quoted,
+      lt: lt,
+      indent: indent,
+      started: false
+    }
+    @lex_state = :EXPR_BEG
+    Token(RDoc::RubyLex::TkHEREDOCBEG, start_token)
+  end
 
-    if output_heredoc then
-      doc = '<<'
-      doc << '-' if indent
-      doc << "#{user_quote}#{quoted}#{user_quote}\n"
-    else
-      doc = '"'
-    end
+  def identify_here_document_body(quoted, lt, indent)
+    ltback, @ltype = @ltype, lt
 
-    @current_readed = @readed
+    doc = ""
+    heredoc_end = nil
     while l = gets
       l = l.sub(/(:?\r)?\n\z/, "\n")
       if (indent ? l.strip : l.chomp) == quoted
+        heredoc_end = l
         break
       end
       doc << l
     end
+    raise Error, "Missing terminating #{quoted} for string" unless heredoc_end
 
-    if output_heredoc then
-      raise Error, "Missing terminating #{quoted} for string" unless l
-
-      doc << l.chomp
-    else
-      doc << '"'
-    end
-
-    @current_readed = @here_readed
-    @here_readed.concat reserve
-    while ch = reserve.pop
-      ungetc ch
-    end
-
-    token_class = output_heredoc ? RDoc::RubyLex::TkHEREDOC : Ltype2Token[lt]
     @ltype = ltback
-    @lex_state = :EXPR_END
-    Token(token_class, doc)
+    @lex_state = :EXPR_BEG
+    [Token(RDoc::RubyLex::TkHEREDOC, doc), heredoc_end]
   end
 
   def identify_quotation

diff --git a/lib/rdoc/ruby_token.rb b/lib/rdoc/ruby_token.rb
@@ -329,6 +329,8 @@ def Token(token, value = nil)
     [:TkCVAR,       TkId],
     [:TkIVAR,       TkId],
     [:TkCONSTANT,   TkId],
+    [:TkHEREDOCBEG, TkId],
+    [:TkHEREDOCEND, TkId],
 
     [:TkINTEGER,    TkVal],
     [:TkFLOAT,      TkVal],

diff --git a/test/test_rdoc_ruby_lex.rb b/test/test_rdoc_ruby_lex.rb
@@ -89,9 +89,11 @@ def x
       @TK::TkIDENTIFIER.new( 4, 1,  4, 'x'),
       @TK::TkNL        .new( 5, 1,  5, "\n"),
       @TK::TkSPACE     .new( 6, 2,  0, '  '),
-      @TK::TkHEREDOC   .new( 8, 2,  2,
-                            %Q{<<E\nLine 1\nLine 2\nE}),
-      @TK::TkNL        .new(27, 5, 28, "\n"),
+
+      @TK::TkHEREDOCBEG.new( 8, 2,  2, '<<E'),
+      @TK::TkNL        .new(11, 2,  6, "\n"),
+      @TK::TkHEREDOC   .new(11, 2,  6, "Line 1\nLine 2\n"),
+      @TK::TkHEREDOCEND.new(27, 5, 26, "E\n"),
       @TK::TkEND       .new(28, 6,  0, 'end'),
       @TK::TkNL        .new(31, 6, 28, "\n"),
     ]
@@ -147,10 +149,12 @@ def test_class_tokenize_heredoc_CR_NL
       @TK::TkSPACE     .new( 6, 1,  6, ' '),
       @TK::TkASSIGN    .new( 7, 1,  7, '='),
       @TK::TkSPACE     .new( 8, 1,  8, ' '),
-      @TK::TkHEREDOC   .new( 9, 1,  9,
-                            %Q{<<-STRING\nLine 1\nLine 2\n  STRING}),
-      @TK::TkSPACE     .new(44, 4, 45, "\r"),
-      @TK::TkNL        .new(45, 4, 46, "\n"),
+      @TK::TkHEREDOCBEG.new( 9, 1,  9, '<<-STRING'),
+      @TK::TkSPACE     .new(18, 1, 18, "\r"),
+      @TK::TkNL        .new(19, 1, 19, "\n"),
+      @TK::TkHEREDOC   .new(19, 1, 19,
+                            %Q{Line 1\nLine 2\n}),
+      @TK::TkHEREDOCEND.new(45, 4, 36, "  STRING\n"),
     ]
 
     assert_equal expected, tokens
@@ -169,10 +173,12 @@ def test_class_tokenize_heredoc_call
       @TK::TkSPACE     .new( 6, 1,  6, ' '),
       @TK::TkASSIGN    .new( 7, 1,  7, '='),
       @TK::TkSPACE     .new( 8, 1,  8, ' '),
-      @TK::TkSTRING    .new( 9, 1,  9, %Q{"Line 1\nLine 2\n"}),
-      @TK::TkDOT       .new(41, 4, 42, '.'),
-      @TK::TkIDENTIFIER.new(42, 4, 43, 'chomp'),
-      @TK::TkNL        .new(47, 4, 48, "\n"),
+      @TK::TkHEREDOCBEG.new( 9, 1,  9, '<<-STRING'),
+      @TK::TkDOT       .new(18, 1, 18, '.'),
+      @TK::TkIDENTIFIER.new(19, 1, 19, 'chomp'),
+      @TK::TkNL        .new(24, 1, 24, "\n"),
+      @TK::TkHEREDOC   .new(24, 1, 24, "Line 1\nLine 2\n"),
+      @TK::TkHEREDOCEND.new(47, 4, 39, "  STRING\n"),
     ]
 
     assert_equal expected, tokens
@@ -191,9 +197,12 @@ def test_class_tokenize_heredoc_indent
       @TK::TkSPACE     .new( 6, 1,  6, ' '),
       @TK::TkASSIGN    .new( 7, 1,  7, '='),
       @TK::TkSPACE     .new( 8, 1,  8, ' '),
-      @TK::TkHEREDOC   .new( 9, 1,  9,
-                            %Q{<<-STRING\nLine 1\nLine 2\n  STRING}),
-      @TK::TkNL        .new(41, 4, 42, "\n"),
+
+
+      @TK::TkHEREDOCBEG.new( 9, 1,  9, '<<-STRING'),
+      @TK::TkNL        .new(18, 1, 18, "\n"),
+      @TK::TkHEREDOC   .new(18, 1, 18, "Line 1\nLine 2\n"),
+      @TK::TkHEREDOCEND.new(41, 4, 33, "  STRING\n")
     ]
 
     assert_equal expected, tokens
@@ -223,8 +232,10 @@ def test_class_tokenize_heredoc_percent_N
       @TK::TkSPACE     .new( 1, 1,  1, ' '),
       @TK::TkIDENTIFIER.new( 2, 1,  2, 'b'),
       @TK::TkSPACE     .new( 3, 1,  3, ' '),
-      @TK::TkHEREDOC   .new( 4, 1,  4, %Q{<<-U\n%N\nU}),
-      @TK::TkNL        .new(13, 3, 14, "\n"),
+      @TK::TkHEREDOCBEG.new( 4, 1,  4, '<<-U'),
+      @TK::TkNL        .new( 8, 1,  8, "\n"),
+      @TK::TkHEREDOC   .new( 8, 1,  8, "%N\n"),
+      @TK::TkHEREDOCEND.new(13, 3, 12, "U\n")
     ]
 
     assert_equal expected, tokens