🐛⚡️♻️ NAMESPACE: fix parsing (not SP-delimited!)

nevans · nevans · commit 5f0805541091 · 2023-02-08T02:38:16.000-05:00
I misread or misunderstood the spec when I first implemented this...
I wrongly inserted SP-delimiters.  Most servers don't list more than one
namespace, so probably very few noticed the bug!

Also:
* ♻️ Rewrote using "new parser style" to more directly imitate the ABNF.
* ⚡️ Small but measurable performance improvement.
* ♻️ Add ParserUtils::Generator#def_token_matchers for quoted, string,
  nil, tagged_ext_label, etc.
* ♻️ Move atom, astring, nstring, etc to top, so they can be aliased.
* ♻️ Use NIL in nstring, nquoted
diff --git a/lib/net/imap/response_parser.rb b/lib/net/imap/response_parser.rb
@@ -222,24 +222,25 @@ def unescape_quoted(quoted)
 
       def_char_matchers :SP,   " ", :T_SPACE
 
+      def_char_matchers :lpar, "(", :T_LPAR
+      def_char_matchers :rpar, ")", :T_RPAR
+
       def_char_matchers :lbra, "[", :T_LBRA
       def_char_matchers :rbra, "]", :T_RBRA
 
-      # atom            = 1*ATOM-CHAR
-      #
-      # TODO: match atom entirely by regexp (in the "lexer")
-      def atom; -combine_adjacent(*ATOM_TOKENS) end
+      def_token_matchers :quoted, T_QUOTED
 
-      # the #accept version of #atom
-      def atom?; -combine_adjacent(*ATOM_TOKENS) if lookahead?(*ATOM_TOKENS) end
+      #   string          = quoted / literal
+      def_token_matchers :string,  T_QUOTED, T_LITERAL
 
-      # Returns <tt>atom.upcase</tt>
-      def case_insensitive__atom; -combine_adjacent(*ATOM_TOKENS).upcase end
+      # use where string represents "LABEL" values
+      def_token_matchers :case_insensitive_string,
+                         T_QUOTED, T_LITERAL,
+                         send: :upcase
 
-      # Returns <tt>atom?&.upcase</tt>
-      def case_insensitive__atom?
-        -combine_adjacent(*ATOM_TOKENS).upcase if lookahead?(*ATOM_TOKENS)
-      end
+      # n.b: NIL? and NIL! return the "NIL" atom string (truthy) on success.
+      # NIL? returns nil when it does *not* match
+      def_token_matchers :NIL, T_NIL
 
       # In addition to explicitly uses of +tagged-ext-label+, use this to match
       # keywords when the grammar has not provided any extension syntax.
@@ -254,8 +255,47 @@ def case_insensitive__atom?
       #   tagged-label-char   = tagged-label-fchar / DIGIT / ":"
       #
       # TODO: add to lexer and only match tagged-ext-label
-      alias tagged_ext_label  case_insensitive__atom
-      alias tagged_ext_label? case_insensitive__atom?
+      def_token_matchers :tagged_ext_label, T_ATOM, T_NIL, send: :upcase
+
+      # atom            = 1*ATOM-CHAR
+      # ATOM-CHAR       = <any CHAR except atom-specials>
+      ATOM_TOKENS = [T_ATOM, T_NUMBER, T_NIL, T_LBRA, T_PLUS]
+
+      # ASTRING-CHAR    = ATOM-CHAR / resp-specials
+      # resp-specials   = "]"
+      ASTRING_CHARS_TOKENS = [*ATOM_TOKENS, T_RBRA].freeze
+
+      ASTRING_TOKENS = [T_QUOTED, *ASTRING_CHARS_TOKENS, T_LITERAL].freeze
+
+      # atom            = 1*ATOM-CHAR
+      #
+      # TODO: match atom entirely by regexp (in the "lexer")
+      def atom; -combine_adjacent(*ATOM_TOKENS) end
+
+      # the #accept version of #atom
+      def atom?; -combine_adjacent(*ATOM_TOKENS) if lookahead?(*ATOM_TOKENS) end
+
+      # Returns <tt>atom.upcase</tt>
+      def case_insensitive__atom; -combine_adjacent(*ATOM_TOKENS).upcase end
+
+      # Returns <tt>atom?&.upcase</tt>
+      def case_insensitive__atom?
+        -combine_adjacent(*ATOM_TOKENS).upcase if lookahead?(*ATOM_TOKENS)
+      end
+
+      # TODO: handle astring_chars entirely inside the lexer
+      def astring_chars
+        combine_adjacent(*ASTRING_CHARS_TOKENS)
+      end
+
+      #   astring         = 1*ASTRING-CHAR / string
+      def astring
+        lookahead?(*ASTRING_CHARS_TOKENS) ? astring_chars : string
+      end
+
+      def astring?
+        lookahead?(*ASTRING_CHARS_TOKENS) ? astring_chars : string?
+      end
 
       # Use #label or #label_in to assert specific known labels
       # (+tagged-ext-label+ only, not +atom+).
@@ -264,6 +304,15 @@ def label(word)
         parse_error("unexpected atom %p, expected %p instead", val, word)
       end
 
+      #   nstring         = string / nil
+      def nstring
+        NIL? ? nil : string
+      end
+
+      def nquoted
+        NIL? ? nil : quoted
+      end
+
       def response
         token = lookahead
         case token.symbol
@@ -1198,65 +1247,56 @@ def id_response
         end
       end
 
+      # namespace-response = "NAMESPACE" SP namespace
+      #                       SP namespace SP namespace
+      #                  ; The first Namespace is the Personal Namespace(s).
+      #                  ; The second Namespace is the Other Users'
+      #                  ; Namespace(s).
+      #                  ; The third Namespace is the Shared Namespace(s).
       def namespace_response
+        name = label("NAMESPACE")
         @lex_state = EXPR_DATA
-        token = lookahead
-        token = match(T_ATOM)
-        name = token.value.upcase
-        match(T_SPACE)
-        personal = namespaces
-        match(T_SPACE)
-        other = namespaces
-        match(T_SPACE)
-        shared = namespaces
+        data = Namespaces.new((SP!; namespace),
+                              (SP!; namespace),
+                              (SP!; namespace))
+        UntaggedResponse.new(name, data, @str)
+      ensure
         @lex_state = EXPR_BEG
-        data = Namespaces.new(personal, other, shared)
-        return UntaggedResponse.new(name, data, @str)
-      end
-
-      def namespaces
-        token = lookahead
-        # empty () is not allowed, so nil is functionally identical to empty.
-        data = []
-        if token.symbol == T_NIL
-          shift_token
-        else
-          match(T_LPAR)
-          loop do
-            data << namespace
-            break unless lookahead.symbol == T_SPACE
-            shift_token
-          end
-          match(T_RPAR)
-        end
-        data
       end
 
+      # namespace         = nil / "(" 1*namespace-descr ")"
       def namespace
-        match(T_LPAR)
-        prefix = match(T_QUOTED, T_LITERAL).value
-        match(T_SPACE)
-        delimiter = string
+        NIL? and return []
+        lpar
+        list = [namespace_descr]
+        list << namespace_descr until rpar?
+        list
+      end
+
+      # namespace-descr   = "(" string SP
+      #                        (DQUOTE QUOTED-CHAR DQUOTE / nil)
+      #                         [namespace-response-extensions] ")"
+      def namespace_descr
+        lpar
+        prefix     = string; SP!
+        delimiter  = nquoted # n.b: should only accept single char
         extensions = namespace_response_extensions
-        match(T_RPAR)
+        rpar
         Namespace.new(prefix, delimiter, extensions)
       end
 
+      # namespace-response-extensions = *namespace-response-extension
+      # namespace-response-extension = SP string SP
+      #                   "(" string *(SP string) ")"
       def namespace_response_extensions
         data = {}
-        token = lookahead
-        if token.symbol == T_SPACE
-          shift_token
-          name = match(T_QUOTED, T_LITERAL).value
+        while SP?
+          name = string; SP!
+          lpar
           data[name] ||= []
-          match(T_SPACE)
-          match(T_LPAR)
-          loop do
-            data[name].push match(T_QUOTED, T_LITERAL).value
-            break unless lookahead.symbol == T_SPACE
-            shift_token
-          end
-          match(T_RPAR)
+          data[name] << string
+          data[name] << string while SP?
+          rpar
         end
         data
       end
@@ -1459,80 +1499,6 @@ def flag_list
         end
       end
 
-      def nstring
-        token = lookahead
-        if token.symbol == T_NIL
-          shift_token
-          return nil
-        else
-          return string
-        end
-      end
-
-      def astring
-        token = lookahead
-        if string_token?(token)
-          return string
-        else
-          return astring_chars
-        end
-      end
-
-      def string
-        token = lookahead
-        if token.symbol == T_NIL
-          shift_token
-          return nil
-        end
-        token = match(T_QUOTED, T_LITERAL)
-        return token.value
-      end
-
-      STRING_TOKENS = [T_QUOTED, T_LITERAL, T_NIL]
-
-      def string_token?(token)
-        return STRING_TOKENS.include?(token.symbol)
-      end
-
-      def case_insensitive_string
-        token = lookahead
-        if token.symbol == T_NIL
-          shift_token
-          return nil
-        end
-        token = match(T_QUOTED, T_LITERAL)
-        return token.value.upcase
-      end
-
-      # atom            = 1*ATOM-CHAR
-      # ATOM-CHAR       = <any CHAR except atom-specials>
-      ATOM_TOKENS = [
-        T_ATOM,
-        T_NUMBER,
-        T_NIL,
-        T_LBRA,
-        T_PLUS
-      ]
-
-      # ASTRING-CHAR    = ATOM-CHAR / resp-specials
-      # resp-specials   = "]"
-      ASTRING_CHARS_TOKENS = [*ATOM_TOKENS, T_RBRA]
-
-      def astring_chars
-        combine_adjacent(*ASTRING_CHARS_TOKENS)
-      end
-
-      def combine_adjacent(*tokens)
-        result = "".b
-        while token = accept(*tokens)
-          result << token.value
-        end
-        if result.empty?
-          parse_error('unexpected token %s (expected %s)',
-                      lookahead.symbol, args.join(" or "))
-        end
-        result
-      end
 
       # See https://www.rfc-editor.org/errata/rfc3501
       #
diff --git a/lib/net/imap/response_parser/parser_utils.rb b/lib/net/imap/response_parser/parser_utils.rb
@@ -47,10 +47,68 @@ def #{match_name}
             RUBY
           end
 
+          # TODO: move coersion to the token.value method?
+          def def_token_matchers(name, *token_symbols, coerce: nil, send: nil)
+            match_name = name.match(/\A[A-Z]/) ? "#{name}!" : name
+
+            if token_symbols.size == 1
+              token   = token_symbols.first
+              matcher = "token&.symbol == %p" % [token]
+              desc    = token
+            else
+              matcher = "%p.include? token&.symbol" % [token_symbols]
+              desc    = token_symbols.join(" or ")
+            end
+
+            value = "(token.value)"
+            value = coerce.to_s + value   if coerce
+            value = [value, send].join(".") if send
+
+            raise_parse_error = <<~RUBY
+              parse_error("unexpected %s (expected #{desc})", token&.symbol)
+            RUBY
+
+            class_eval <<~RUBY, __FILE__, __LINE__ + 1
+              # frozen_string_literal: true
+
+              def #{name}?
+                token = #{LOOKAHEAD}
+                if #{matcher}
+                  #{SHIFT_TOKEN}
+                  #{value}
+                end
+              end
+
+              def #{match_name}
+                token = #{LOOKAHEAD}
+                if #{matcher}
+                  #{SHIFT_TOKEN}
+                  #{value}
+                else
+                  #{raise_parse_error}
+                end
+              end
+            RUBY
+          end
+
         end
 
         private
 
+        # TODO: after checking the lookahead, use a regexp for remaining chars.
+        # That way a loop isn't needed.
+        def combine_adjacent(*tokens)
+          result = "".b
+          while token = accept(*tokens)
+            result << token.value
+          end
+          if result.empty?
+            parse_error('unexpected token %s (expected %s)',
+                        lookahead.symbol, tokens.join(" or "))
+          end
+          result
+        end
+
         def match(*args)
           token = lookahead
           unless args.include?(token.symbol)
diff --git a/test/net/imap/fixtures/response_parser/namespace_responses.yml b/test/net/imap/fixtures/response_parser/namespace_responses.yml
@@ -49,9 +49,7 @@
       raw_data: *rfc2342_ex5_3
 
   NAMESPACE_rfc2342_example_5.4:
-    # WARNING: this example is wrong and will be fixed soon...
-    :response: &rfc2342_ex5_4 "* NAMESPACE ((\"\" \"/\")) ((\"~\" \"/\")) ((\"#shared/\" \"/\") (\"#public/\"
-      \"/\") (\"#ftp/\" \"/\") (\"#news.\" \".\"))\r\n"
+    :response: &rfc2342_ex5_4 "* NAMESPACE ((\"\" \"/\")) ((\"~\" \"/\")) ((\"#shared/\" \"/\")(\"#public/\" \"/\")(\"#ftp/\" \"/\")(\"#news.\" \".\"))\r\n"
     :expected: !ruby/struct:Net::IMAP::UntaggedResponse
       name: NAMESPACE
       data: !ruby/struct:Net::IMAP::Namespaces
@@ -100,7 +98,7 @@
       raw_data: *rfc2342_ex5_5
 
   NAMESPACE_rfc2342_example_5.6:
-    :response: &rfc2342_ex5_6 "* NAMESPACE ((\"\" \"/\") (\"#mh/\" \"/\" \"X-PARAM\" (\"FLAG1\" \"FLAG2\")))
+    :response: &rfc2342_ex5_6 "* NAMESPACE ((\"\" \"/\")(\"#mh/\" \"/\" \"X-PARAM\" (\"FLAG1\" \"FLAG2\")))
       NIL NIL\r\n"
     :expected: !ruby/struct:Net::IMAP::UntaggedResponse
       name: NAMESPACE