diff --git a/lib/net/imap/response_parser.rb b/lib/net/imap/response_parser.rb index de884b4a..73288c30 100644 --- a/lib/net/imap/response_parser.rb +++ b/lib/net/imap/response_parser.rb @@ -222,24 +222,25 @@ def unescape_quoted(quoted) def_char_matchers :SP, " ", :T_SPACE + def_char_matchers :lpar, "(", :T_LPAR + def_char_matchers :rpar, ")", :T_RPAR + def_char_matchers :lbra, "[", :T_LBRA def_char_matchers :rbra, "]", :T_RBRA - # atom = 1*ATOM-CHAR - # - # TODO: match atom entirely by regexp (in the "lexer") - def atom; -combine_adjacent(*ATOM_TOKENS) end + def_token_matchers :quoted, T_QUOTED - # the #accept version of #atom - def atom?; -combine_adjacent(*ATOM_TOKENS) if lookahead?(*ATOM_TOKENS) end + # string = quoted / literal + def_token_matchers :string, T_QUOTED, T_LITERAL - # Returns atom.upcase - def case_insensitive__atom; -combine_adjacent(*ATOM_TOKENS).upcase end + # use where string represents "LABEL" values + def_token_matchers :case_insensitive_string, + T_QUOTED, T_LITERAL, + send: :upcase - # Returns atom?&.upcase - def case_insensitive__atom? - -combine_adjacent(*ATOM_TOKENS).upcase if lookahead?(*ATOM_TOKENS) - end + # n.b: NIL? and NIL! return the "NIL" atom string (truthy) on success. + # NIL? returns nil when it does *not* match + def_token_matchers :NIL, T_NIL # In addition to explicitly uses of +tagged-ext-label+, use this to match # keywords when the grammar has not provided any extension syntax. @@ -254,8 +255,47 @@ def case_insensitive__atom? # tagged-label-char = tagged-label-fchar / DIGIT / ":" # # TODO: add to lexer and only match tagged-ext-label - alias tagged_ext_label case_insensitive__atom - alias tagged_ext_label? case_insensitive__atom? + def_token_matchers :tagged_ext_label, T_ATOM, T_NIL, send: :upcase + + # atom = 1*ATOM-CHAR + # ATOM-CHAR = + ATOM_TOKENS = [T_ATOM, T_NUMBER, T_NIL, T_LBRA, T_PLUS] + + # ASTRING-CHAR = ATOM-CHAR / resp-specials + # resp-specials = "]" + ASTRING_CHARS_TOKENS = [*ATOM_TOKENS, T_RBRA].freeze + + ASTRING_TOKENS = [T_QUOTED, *ASTRING_CHARS_TOKENS, T_LITERAL].freeze + + # atom = 1*ATOM-CHAR + # + # TODO: match atom entirely by regexp (in the "lexer") + def atom; -combine_adjacent(*ATOM_TOKENS) end + + # the #accept version of #atom + def atom?; -combine_adjacent(*ATOM_TOKENS) if lookahead?(*ATOM_TOKENS) end + + # Returns atom.upcase + def case_insensitive__atom; -combine_adjacent(*ATOM_TOKENS).upcase end + + # Returns atom?&.upcase + def case_insensitive__atom? + -combine_adjacent(*ATOM_TOKENS).upcase if lookahead?(*ATOM_TOKENS) + end + + # TODO: handle astring_chars entirely inside the lexer + def astring_chars + combine_adjacent(*ASTRING_CHARS_TOKENS) + end + + # astring = 1*ASTRING-CHAR / string + def astring + lookahead?(*ASTRING_CHARS_TOKENS) ? astring_chars : string + end + + def astring? + lookahead?(*ASTRING_CHARS_TOKENS) ? astring_chars : string? + end # Use #label or #label_in to assert specific known labels # (+tagged-ext-label+ only, not +atom+). @@ -264,6 +304,15 @@ def label(word) parse_error("unexpected atom %p, expected %p instead", val, word) end + # nstring = string / nil + def nstring + NIL? ? nil : string + end + + def nquoted + NIL? ? nil : quoted + end + def response token = lookahead case token.symbol @@ -1198,65 +1247,56 @@ def id_response end end + # namespace-response = "NAMESPACE" SP namespace + # SP namespace SP namespace + # ; The first Namespace is the Personal Namespace(s). + # ; The second Namespace is the Other Users' + # ; Namespace(s). + # ; The third Namespace is the Shared Namespace(s). def namespace_response + name = label("NAMESPACE") @lex_state = EXPR_DATA - token = lookahead - token = match(T_ATOM) - name = token.value.upcase - match(T_SPACE) - personal = namespaces - match(T_SPACE) - other = namespaces - match(T_SPACE) - shared = namespaces + data = Namespaces.new((SP!; namespace), + (SP!; namespace), + (SP!; namespace)) + UntaggedResponse.new(name, data, @str) + ensure @lex_state = EXPR_BEG - data = Namespaces.new(personal, other, shared) - return UntaggedResponse.new(name, data, @str) - end - - def namespaces - token = lookahead - # empty () is not allowed, so nil is functionally identical to empty. - data = [] - if token.symbol == T_NIL - shift_token - else - match(T_LPAR) - loop do - data << namespace - break unless lookahead.symbol == T_SPACE - shift_token - end - match(T_RPAR) - end - data end + # namespace = nil / "(" 1*namespace-descr ")" def namespace - match(T_LPAR) - prefix = match(T_QUOTED, T_LITERAL).value - match(T_SPACE) - delimiter = string + NIL? and return [] + lpar + list = [namespace_descr] + list << namespace_descr until rpar? + list + end + + # namespace-descr = "(" string SP + # (DQUOTE QUOTED-CHAR DQUOTE / nil) + # [namespace-response-extensions] ")" + def namespace_descr + lpar + prefix = string; SP! + delimiter = nquoted # n.b: should only accept single char extensions = namespace_response_extensions - match(T_RPAR) + rpar Namespace.new(prefix, delimiter, extensions) end + # namespace-response-extensions = *namespace-response-extension + # namespace-response-extension = SP string SP + # "(" string *(SP string) ")" def namespace_response_extensions data = {} - token = lookahead - if token.symbol == T_SPACE - shift_token - name = match(T_QUOTED, T_LITERAL).value + while SP? + name = string; SP! + lpar data[name] ||= [] - match(T_SPACE) - match(T_LPAR) - loop do - data[name].push match(T_QUOTED, T_LITERAL).value - break unless lookahead.symbol == T_SPACE - shift_token - end - match(T_RPAR) + data[name] << string + data[name] << string while SP? + rpar end data end @@ -1459,80 +1499,6 @@ def flag_list end end - def nstring - token = lookahead - if token.symbol == T_NIL - shift_token - return nil - else - return string - end - end - - def astring - token = lookahead - if string_token?(token) - return string - else - return astring_chars - end - end - - def string - token = lookahead - if token.symbol == T_NIL - shift_token - return nil - end - token = match(T_QUOTED, T_LITERAL) - return token.value - end - - STRING_TOKENS = [T_QUOTED, T_LITERAL, T_NIL] - - def string_token?(token) - return STRING_TOKENS.include?(token.symbol) - end - - def case_insensitive_string - token = lookahead - if token.symbol == T_NIL - shift_token - return nil - end - token = match(T_QUOTED, T_LITERAL) - return token.value.upcase - end - - # atom = 1*ATOM-CHAR - # ATOM-CHAR = - ATOM_TOKENS = [ - T_ATOM, - T_NUMBER, - T_NIL, - T_LBRA, - T_PLUS - ] - - # ASTRING-CHAR = ATOM-CHAR / resp-specials - # resp-specials = "]" - ASTRING_CHARS_TOKENS = [*ATOM_TOKENS, T_RBRA] - - def astring_chars - combine_adjacent(*ASTRING_CHARS_TOKENS) - end - - def combine_adjacent(*tokens) - result = "".b - while token = accept(*tokens) - result << token.value - end - if result.empty? - parse_error('unexpected token %s (expected %s)', - lookahead.symbol, args.join(" or ")) - end - result - end # See https://www.rfc-editor.org/errata/rfc3501 # diff --git a/lib/net/imap/response_parser/parser_utils.rb b/lib/net/imap/response_parser/parser_utils.rb index 1583c895..38d2bbb1 100644 --- a/lib/net/imap/response_parser/parser_utils.rb +++ b/lib/net/imap/response_parser/parser_utils.rb @@ -47,10 +47,68 @@ def #{match_name} RUBY end + # TODO: move coersion to the token.value method? + def def_token_matchers(name, *token_symbols, coerce: nil, send: nil) + match_name = name.match(/\A[A-Z]/) ? "#{name}!" : name + + if token_symbols.size == 1 + token = token_symbols.first + matcher = "token&.symbol == %p" % [token] + desc = token + else + matcher = "%p.include? token&.symbol" % [token_symbols] + desc = token_symbols.join(" or ") + end + + value = "(token.value)" + value = coerce.to_s + value if coerce + value = [value, send].join(".") if send + + raise_parse_error = <<~RUBY + parse_error("unexpected %s (expected #{desc})", token&.symbol) + RUBY + + class_eval <<~RUBY, __FILE__, __LINE__ + 1 + # frozen_string_literal: true + + def #{name}? + token = #{LOOKAHEAD} + if #{matcher} + #{SHIFT_TOKEN} + #{value} + end + end + + def #{match_name} + token = #{LOOKAHEAD} + if #{matcher} + #{SHIFT_TOKEN} + #{value} + else + #{raise_parse_error} + end + end + RUBY + end + end private + # TODO: after checking the lookahead, use a regexp for remaining chars. + # That way a loop isn't needed. + def combine_adjacent(*tokens) + result = "".b + while token = accept(*tokens) + result << token.value + end + if result.empty? + parse_error('unexpected token %s (expected %s)', + lookahead.symbol, tokens.join(" or ")) + end + result + end + def match(*args) token = lookahead unless args.include?(token.symbol) diff --git a/test/net/imap/fixtures/response_parser/namespace_responses.yml b/test/net/imap/fixtures/response_parser/namespace_responses.yml index a6db24af..2f0514d3 100644 --- a/test/net/imap/fixtures/response_parser/namespace_responses.yml +++ b/test/net/imap/fixtures/response_parser/namespace_responses.yml @@ -49,9 +49,7 @@ raw_data: *rfc2342_ex5_3 NAMESPACE_rfc2342_example_5.4: - # WARNING: this example is wrong and will be fixed soon... - :response: &rfc2342_ex5_4 "* NAMESPACE ((\"\" \"/\")) ((\"~\" \"/\")) ((\"#shared/\" \"/\") (\"#public/\" - \"/\") (\"#ftp/\" \"/\") (\"#news.\" \".\"))\r\n" + :response: &rfc2342_ex5_4 "* NAMESPACE ((\"\" \"/\")) ((\"~\" \"/\")) ((\"#shared/\" \"/\")(\"#public/\" \"/\")(\"#ftp/\" \"/\")(\"#news.\" \".\"))\r\n" :expected: !ruby/struct:Net::IMAP::UntaggedResponse name: NAMESPACE data: !ruby/struct:Net::IMAP::Namespaces @@ -100,7 +98,7 @@ raw_data: *rfc2342_ex5_5 NAMESPACE_rfc2342_example_5.6: - :response: &rfc2342_ex5_6 "* NAMESPACE ((\"\" \"/\") (\"#mh/\" \"/\" \"X-PARAM\" (\"FLAG1\" \"FLAG2\"))) + :response: &rfc2342_ex5_6 "* NAMESPACE ((\"\" \"/\")(\"#mh/\" \"/\" \"X-PARAM\" (\"FLAG1\" \"FLAG2\"))) NIL NIL\r\n" :expected: !ruby/struct:Net::IMAP::UntaggedResponse name: NAMESPACE