ruby · schneems · Nov 6, 2020 · Nov 5, 2020 · Nov 5, 2020 · Nov 6, 2020
diff --git a/lib/syntax_error_search.rb b/lib/syntax_error_search.rb
@@ -14,13 +14,72 @@ def self.indent(string)
     end
   end
 
+  # This will tell you if the `code_lines` would be valid
+  # if you removed the `without_lines`. In short it's a
+  # way to detect if we've found the lines with syntax errors
+  # in our document yet.
+  #
+  #   code_lines = [
+  #     CodeLine.new(line: "def foo\n",   index: 0)
+  #     CodeLine.new(line: "  def bar\n", index: 1)
+  #     CodeLine.new(line: "end\n",       index: 2)
+  #   ]
+  #
+  #   SyntaxErrorSearch.valid_without?(
+  #     without_lines: code_lines[1],
+  #     code_lines: code_lines
+  #   )                                    # => true
+  #
+  #   SyntaxErrorSearch.valid?(code_lines) # => false
+  def self.valid_without?(without_lines: , code_lines:)
+    lines = code_lines - Array(without_lines).flatten
 
+    if lines.empty?
+      return true
+    else
+      return valid?(lines)
+    end
+  end
+
+  # Returns truthy if a given input source is valid syntax
+  #
+  #   SyntaxErrorSearch.valid?(<<~EOM) # => true
+  #     def foo
+  #     end
+  #   EOM
+  #
+  #   SyntaxErrorSearch.valid?(<<~EOM) # => false
+  #     def foo
+  #       def bar # Syntax error here
+  #     end
+  #   EOM
+  #
+  # You can also pass in an array of lines and they'll be
+  # joined before evaluating
+  #
+  #   SyntaxErrorSearch.valid?(
+  #     [
+  #       "def foo\n",
+  #       "end\n"
+  #     ]
+  #   ) # => true
+  #
+  #   SyntaxErrorSearch.valid?(
+  #     [
+  #       "def foo\n",
+  #       "  def bar\n", # Syntax error here
+  #       "end\n"
+  #     ]
+  #   ) # => false
+  #
+  # As an FYI the CodeLine class instances respond to `to_s`
+  # so passing a CodeLine in as an object or as an array
+  # will convert it to it's code representation.
   def self.valid?(source)
     source = source.join if source.is_a?(Array)
     source = source.to_s
 
     # Parser writes to stderr even if you catch the error
-    #
     stderr = $stderr
     $stderr = StringIO.new
 
@@ -37,3 +96,4 @@ def self.valid?(source)
 require_relative "syntax_error_search/code_block"
 require_relative "syntax_error_search/code_frontier"
 require_relative "syntax_error_search/code_search"
+require_relative "syntax_error_search/display_invalid_blocks"
diff --git a/lib/syntax_error_search/code_frontier.rb b/lib/syntax_error_search/code_frontier.rb
@@ -1,5 +1,143 @@
 module SyntaxErrorSearch
   # This class is responsible for generating, storing, and sorting code blocks
+  #
+  # The search algorithm for finding our syntax errors isn't in this class, but
+  # this is class holds the bulk of the logic for generating, storing, detecting
+  # and filtering invalid code.
+  #
+  # This is loosely based on the idea of a "frontier" for searching for a path
+  # example: https://en.wikipedia.org/wiki/Dijkstra%27s_algorithm
+  #
+  # In this case our path is going from code with a syntax error to code without a
+  # syntax error. We're currently doing that by evaluating individual lines
+  # with respect to indentation and other whitespace (empty lines). As represented
+  # by individual "code blocks".
+  #
+  # This class does not just store the frontier that we're searching, but is responsible
+  # for generating new code blocks as well. This is not ideal, but the state of generating
+  # and evaluating paths i.e. codeblocks is very tightly coupled.
+  #
+  # ## Creation
+  #
+  # This example code is re-used in the other sections
+  #
+  # Example:
+  #
+  #   code_lines = [
+  #     CodeLine.new(line: "def cinco\n", index: 0)
+  #     CodeLine.new(line: "  def dog\n", index: 1) # Syntax error 1
+  #     CodeLine.new(line: "  def cat\n", index: 2) # Syntax error 2
+  #     CodeLine.new(line: "end\n",       index: 3)
+  #   ]
+  #
+  #   frontier = CodeFrontier.new(code_lines: code_lines)
+  #
+  #   frontier << frontier.next_block if frontier.next_block?
+  #   frontier << frontier.next_block if frontier.next_block?
+  #
+  #   frontier.holds_all_syntax_errors? # => true
+  #   block = frontier.pop
+  #   frontier.holds_all_syntax_errors? # => false
+  #   frontier << block
+  #   frontier.holds_all_syntax_errors? # => true
+  #
+  #   frontier.detect_invalid_blocks.map(&:to_s) # =>
+  #   [
+  #     "def dog\n",
+  #     "def cat\n"
+  #   ]
+  #
+  # ## Block Generation
+  #
+  # Currently code blocks are generated based off of indentation. With the idea that blocks are,
+  # well, indented. Once a code block is added to the frontier or it is expanded, or it is generated
+  # then we also need to remove those lines from our generation code so we don't generate the same block
+  # twice by accident.
+  #
+  # This is block generation is currently done via the "indent_hash" internally by starting at the outer
+  # most indentation.
+  #
+  # Example:
+  #
+  #   ```
+  #   def river
+  #     puts "lol" # <=== Start looking here and expand outwards
+  #   end
+  #   ```
+  #
+  # Generating new code blocks is a little verbose but looks like this:
+  #
+  #   frontier << frontier.next_block if frontier.next_block?
+  #
+  # Once a block is in the frontier, it can be popped off:
+  #
+  #   frontier.pop
+  #   # => <# CodeBlock >
+  #
+  # ## Block (frontier) storage, ordering and retrieval
+  #
+  # Once a block is generated it is stored internally in a frontier array. This is very similar to a search algorithm.
+  # The array is sorted by indentation order, so that when a block is popped off the array, the one with
+  # the largest current indentation is evaluated first.
+  #
+  # For example, if we have these two blocks in the frontier:
+  #
+  #   ```
+  #   # Block A - 0 spaces for indentation
+  #
+  #   def cinco
+  #     puts "lol"
+  #   end
+  #   ```
+  #
+  #   ```
+  #   # Block B - 2 spaces for indentation
+  #
+  #     def river
+  #       puts "hehe"
+  #     end
+  #   ```
+  #
+  # The "Block B" has more current indentation, so it would be evaluated first.
+  #
+  # ## Frontier evaluation (Find the syntax error)
+  #
+  # Another key difference between this and a normal search "frontier" is that we're not checking if
+  # an individual code block meets the goal (turning invalid code to valid code) since there can
+  # be multiple syntax errors and this will require multiple code blocks. To handle this, we're
+  # evaluating all the contents of the frontier at the same time to see if the solution exists in any
+  # of our search blocks.
+  #
+  #   # Using the previously generated frontier
+  #
+  #   frontier << Block.new(lines: code_lines[1], code_lines: code_lines)
+  #   frontier.holds_all_syntax_errors? # => false
+  #
+  #   frontier << Block.new(lines: code_lines[2], code_lines: code_lines)
+  #   frontier.holds_all_syntax_errors? # => true
+  #
+  # ## Detect invalid blocks (Filter for smallest solution)
+  #
+  # After we prove that a solution exists and we've found it to be in our frontier, we can start stop searching.
+  # Once we've done this, we need to search through the existing frontier code blocks to find the minimum combination
+  # of blocks that hold the solution. This is done in: `detect_invalid_blocks`.
+  #
+  #   # Using the previously generated frontier
+  #
+  #   frontier << CodeBlock.new(lines: code_lines[0], code_lines: code_lines)
+  #   frontier << CodeBlock.new(lines: code_lines[1], code_lines: code_lines)
+  #   frontier << CodeBlock.new(lines: code_lines[2], code_lines: code_lines)
+  #   frontier << CodeBlock.new(lines: code_lines[3], code_lines: code_lines)
+  #
+  #   frontier.count # => 4
+  #   frontier.detect_invalid_blocks.length => 2
+  #   frontier.detect_invalid_blocks.map(&:to_s) # =>
+  #   [
+  #     "def dog\n",
+  #     "def cat\n"
+  #   ]
+  #
+  # Once invalid blocks are found and filtered, then they can be passed to a formatter.
   class CodeFrontier
     def initialize(code_lines: )
       @code_lines = code_lines
@@ -13,33 +151,36 @@ def initialize(code_lines: )
       end
     end
 
+    def count
+      @frontier.count
+    end
+
     # Returns true if the document is valid with all lines
     # removed. By default it checks all blocks in present in
     # the frontier array, but can be used for arbitrary arrays
     # of codeblocks as well
     def holds_all_syntax_errors?(block_array = @frontier)
-      lines = @code_lines
-      block_array.each do |block|
-        lines -= block.lines
+      without_lines = block_array.map do |block|
+        block.lines
       end
 
-      return true if lines.empty?
-
-      CodeBlock.new(
-        code_lines: @code_lines,
-        lines: lines
-      ).valid?
+      SyntaxErrorSearch.valid_without?(
+        without_lines: without_lines,
+        code_lines: @code_lines
+      )
     end
 
     # Returns a code block with the largest indentation possible
     def pop
       return nil if empty?
 
-      self << next_block unless @indent_hash.empty?
-
       return @frontier.pop
     end
 
+    def next_block?
+      !@indent_hash.empty?
+    end
+
     def next_block
       indent = @indent_hash.keys.sort.last
       lines = @indent_hash[indent].first

diff --git a/lib/syntax_error_search/code_search.rb b/lib/syntax_error_search/code_search.rb
@@ -1,4 +1,27 @@
 module SyntaxErrorSearch
+  # Searches code for a syntax error
+  #
+  # The bulk of the heavy lifting is done by the CodeFrontier
+  #
+  # The flow looks like this:
+  #
+  # ## Syntax error detection
+  #
+  # When the frontier holds the syntax error, we can stop searching
+  #
+  #
+  #   search = CodeSearch.new(<<~EOM)
+  #     def dog
+  #       def lol
+  #     end
+  #   EOM
+  #
+  #   search.call
+  #
+  #   search.invalid_blocks.map(&:to_s) # =>
+  #   # => ["def lol\n"]
+  #
+  #
   class CodeSearch
     private; attr_reader :frontier; public
     public; attr_reader :invalid_blocks
@@ -13,6 +36,8 @@ def initialize(string)
 
     def call
       until frontier.holds_all_syntax_errors?
+        frontier << frontier.next_block if frontier.next_block?
+
         block = frontier.pop
 
         if block.valid?

diff --git a/lib/syntax_error_search/display_invalid_blocks.rb b/lib/syntax_error_search/display_invalid_blocks.rb
@@ -0,0 +1,63 @@
+module SyntaxErrorSearch
+  # Used for formatting invalid blocks
+  class DisplayInvalidBlocks
+    attr_reader :filename
+
+    def initialize(block_array, io: $stderr, filename: nil)
+      @filename = filename
+      @io = io
+      @blocks = block_array
+      @lines = @blocks.map(&:lines).flatten
+      @digit_count = @lines.last.line_number.to_s.length
+      @code_lines = @blocks.first.code_lines
+
+      @invalid_line_hash = @lines.each_with_object({}) {|line, h| h[line] = true}
+    end
+
+    def call
+      @io.puts <<~EOM
+
+        SyntaxErrorSearch: A syntax error was detected
+
+        This code has an unmatched `end` this is caused by either
+        missing a syntax keyword (`def`,  `do`, etc.) or inclusion
+        of an extra `end` line
+
+      EOM
+      @io.puts("file: #{filename}") if filename
+      @io.puts <<~EOM
+        simplified:
+
+        #{code_with_filename(indent: 2)}
+      EOM
+    end
+
+
+    def code_with_filename(indent: 0)
+      string = String.new("")
+      string << "```\n"
+      # string << "#".rjust(@digit_count) + " filename: #{filename}\n\n" if filename
+      string << code_with_lines
+      string << "```\n"
+
+      string.each_line.map {|l| " " * indent + l }.join
+    end
+
+    def code_with_lines
+      @code_lines.map do |line|
+        next if line.hidden?
+        number = line.line_number.to_s.rjust(@digit_count)
+        if line.empty?
+          "#{number.to_s}#{line}"
+        else
+          string = String.new
+          string << "\e[1;3m" if @invalid_line_hash[line] # Bold, italics
+          string << "#{number.to_s}  "
+          string << line.to_s
+          string << "\e[0m"
+          string
+        end
+      end.join
+    end
+  end
+end