Skip to content

🐛 Fix NAMESPACE parsing (and other ♻️ refactoring) #112

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
234 changes: 100 additions & 134 deletions lib/net/imap/response_parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -222,24 +222,25 @@ def unescape_quoted(quoted)

def_char_matchers :SP, " ", :T_SPACE

def_char_matchers :lpar, "(", :T_LPAR
def_char_matchers :rpar, ")", :T_RPAR

def_char_matchers :lbra, "[", :T_LBRA
def_char_matchers :rbra, "]", :T_RBRA

# atom = 1*ATOM-CHAR
#
# TODO: match atom entirely by regexp (in the "lexer")
def atom; -combine_adjacent(*ATOM_TOKENS) end
def_token_matchers :quoted, T_QUOTED

# the #accept version of #atom
def atom?; -combine_adjacent(*ATOM_TOKENS) if lookahead?(*ATOM_TOKENS) end
# string = quoted / literal
def_token_matchers :string, T_QUOTED, T_LITERAL

# Returns <tt>atom.upcase</tt>
def case_insensitive__atom; -combine_adjacent(*ATOM_TOKENS).upcase end
# use where string represents "LABEL" values
def_token_matchers :case_insensitive_string,
T_QUOTED, T_LITERAL,
send: :upcase

# Returns <tt>atom?&.upcase</tt>
def case_insensitive__atom?
-combine_adjacent(*ATOM_TOKENS).upcase if lookahead?(*ATOM_TOKENS)
end
# n.b: NIL? and NIL! return the "NIL" atom string (truthy) on success.
# NIL? returns nil when it does *not* match
def_token_matchers :NIL, T_NIL

# In addition to explicitly uses of +tagged-ext-label+, use this to match
# keywords when the grammar has not provided any extension syntax.
Expand All @@ -254,8 +255,47 @@ def case_insensitive__atom?
# tagged-label-char = tagged-label-fchar / DIGIT / ":"
#
# TODO: add to lexer and only match tagged-ext-label
alias tagged_ext_label case_insensitive__atom
alias tagged_ext_label? case_insensitive__atom?
def_token_matchers :tagged_ext_label, T_ATOM, T_NIL, send: :upcase

# atom = 1*ATOM-CHAR
# ATOM-CHAR = <any CHAR except atom-specials>
ATOM_TOKENS = [T_ATOM, T_NUMBER, T_NIL, T_LBRA, T_PLUS]

# ASTRING-CHAR = ATOM-CHAR / resp-specials
# resp-specials = "]"
ASTRING_CHARS_TOKENS = [*ATOM_TOKENS, T_RBRA].freeze

ASTRING_TOKENS = [T_QUOTED, *ASTRING_CHARS_TOKENS, T_LITERAL].freeze

# atom = 1*ATOM-CHAR
#
# TODO: match atom entirely by regexp (in the "lexer")
def atom; -combine_adjacent(*ATOM_TOKENS) end

# the #accept version of #atom
def atom?; -combine_adjacent(*ATOM_TOKENS) if lookahead?(*ATOM_TOKENS) end

# Returns <tt>atom.upcase</tt>
def case_insensitive__atom; -combine_adjacent(*ATOM_TOKENS).upcase end

# Returns <tt>atom?&.upcase</tt>
def case_insensitive__atom?
-combine_adjacent(*ATOM_TOKENS).upcase if lookahead?(*ATOM_TOKENS)
end

# TODO: handle astring_chars entirely inside the lexer
def astring_chars
combine_adjacent(*ASTRING_CHARS_TOKENS)
end

# astring = 1*ASTRING-CHAR / string
def astring
lookahead?(*ASTRING_CHARS_TOKENS) ? astring_chars : string
end

def astring?
lookahead?(*ASTRING_CHARS_TOKENS) ? astring_chars : string?
end

# Use #label or #label_in to assert specific known labels
# (+tagged-ext-label+ only, not +atom+).
Expand All @@ -264,6 +304,15 @@ def label(word)
parse_error("unexpected atom %p, expected %p instead", val, word)
end

# nstring = string / nil
def nstring
NIL? ? nil : string
end

def nquoted
NIL? ? nil : quoted
end

def response
token = lookahead
case token.symbol
Expand Down Expand Up @@ -1198,65 +1247,56 @@ def id_response
end
end

# namespace-response = "NAMESPACE" SP namespace
# SP namespace SP namespace
# ; The first Namespace is the Personal Namespace(s).
# ; The second Namespace is the Other Users'
# ; Namespace(s).
# ; The third Namespace is the Shared Namespace(s).
def namespace_response
name = label("NAMESPACE")
@lex_state = EXPR_DATA
token = lookahead
token = match(T_ATOM)
name = token.value.upcase
match(T_SPACE)
personal = namespaces
match(T_SPACE)
other = namespaces
match(T_SPACE)
shared = namespaces
data = Namespaces.new((SP!; namespace),
(SP!; namespace),
(SP!; namespace))
UntaggedResponse.new(name, data, @str)
ensure
@lex_state = EXPR_BEG
data = Namespaces.new(personal, other, shared)
return UntaggedResponse.new(name, data, @str)
end

def namespaces
token = lookahead
# empty () is not allowed, so nil is functionally identical to empty.
data = []
if token.symbol == T_NIL
shift_token
else
match(T_LPAR)
loop do
data << namespace
break unless lookahead.symbol == T_SPACE
shift_token
end
match(T_RPAR)
end
data
end

# namespace = nil / "(" 1*namespace-descr ")"
def namespace
match(T_LPAR)
prefix = match(T_QUOTED, T_LITERAL).value
match(T_SPACE)
delimiter = string
NIL? and return []
lpar
list = [namespace_descr]
list << namespace_descr until rpar?
list
end

# namespace-descr = "(" string SP
# (DQUOTE QUOTED-CHAR DQUOTE / nil)
# [namespace-response-extensions] ")"
def namespace_descr
lpar
prefix = string; SP!
delimiter = nquoted # n.b: should only accept single char
extensions = namespace_response_extensions
match(T_RPAR)
rpar
Namespace.new(prefix, delimiter, extensions)
end

# namespace-response-extensions = *namespace-response-extension
# namespace-response-extension = SP string SP
# "(" string *(SP string) ")"
def namespace_response_extensions
data = {}
token = lookahead
if token.symbol == T_SPACE
shift_token
name = match(T_QUOTED, T_LITERAL).value
while SP?
name = string; SP!
lpar
data[name] ||= []
match(T_SPACE)
match(T_LPAR)
loop do
data[name].push match(T_QUOTED, T_LITERAL).value
break unless lookahead.symbol == T_SPACE
shift_token
end
match(T_RPAR)
data[name] << string
data[name] << string while SP?
rpar
end
data
end
Expand Down Expand Up @@ -1459,80 +1499,6 @@ def flag_list
end
end

def nstring
token = lookahead
if token.symbol == T_NIL
shift_token
return nil
else
return string
end
end

def astring
token = lookahead
if string_token?(token)
return string
else
return astring_chars
end
end

def string
token = lookahead
if token.symbol == T_NIL
shift_token
return nil
end
token = match(T_QUOTED, T_LITERAL)
return token.value
end

STRING_TOKENS = [T_QUOTED, T_LITERAL, T_NIL]

def string_token?(token)
return STRING_TOKENS.include?(token.symbol)
end

def case_insensitive_string
token = lookahead
if token.symbol == T_NIL
shift_token
return nil
end
token = match(T_QUOTED, T_LITERAL)
return token.value.upcase
end

# atom = 1*ATOM-CHAR
# ATOM-CHAR = <any CHAR except atom-specials>
ATOM_TOKENS = [
T_ATOM,
T_NUMBER,
T_NIL,
T_LBRA,
T_PLUS
]

# ASTRING-CHAR = ATOM-CHAR / resp-specials
# resp-specials = "]"
ASTRING_CHARS_TOKENS = [*ATOM_TOKENS, T_RBRA]

def astring_chars
combine_adjacent(*ASTRING_CHARS_TOKENS)
end

def combine_adjacent(*tokens)
result = "".b
while token = accept(*tokens)
result << token.value
end
if result.empty?
parse_error('unexpected token %s (expected %s)',
lookahead.symbol, args.join(" or "))
end
result
end

# See https://www.rfc-editor.org/errata/rfc3501
#
Expand Down
58 changes: 58 additions & 0 deletions lib/net/imap/response_parser/parser_utils.rb
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,68 @@ def #{match_name}
RUBY
end

# TODO: move coersion to the token.value method?
def def_token_matchers(name, *token_symbols, coerce: nil, send: nil)
match_name = name.match(/\A[A-Z]/) ? "#{name}!" : name

if token_symbols.size == 1
token = token_symbols.first
matcher = "token&.symbol == %p" % [token]
desc = token
else
matcher = "%p.include? token&.symbol" % [token_symbols]
desc = token_symbols.join(" or ")
end

value = "(token.value)"
value = coerce.to_s + value if coerce
value = [value, send].join(".") if send

raise_parse_error = <<~RUBY
parse_error("unexpected %s (expected #{desc})", token&.symbol)
RUBY

class_eval <<~RUBY, __FILE__, __LINE__ + 1
# frozen_string_literal: true

def #{name}?
token = #{LOOKAHEAD}
if #{matcher}
#{SHIFT_TOKEN}
#{value}
end
end

def #{match_name}
token = #{LOOKAHEAD}
if #{matcher}
#{SHIFT_TOKEN}
#{value}
else
#{raise_parse_error}
end
end
RUBY
end

end

private

# TODO: after checking the lookahead, use a regexp for remaining chars.
# That way a loop isn't needed.
def combine_adjacent(*tokens)
result = "".b
while token = accept(*tokens)
result << token.value
end
if result.empty?
parse_error('unexpected token %s (expected %s)',
lookahead.symbol, tokens.join(" or "))
end
result
end

def match(*args)
token = lookahead
unless args.include?(token.symbol)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,7 @@
raw_data: *rfc2342_ex5_3

NAMESPACE_rfc2342_example_5.4:
# WARNING: this example is wrong and will be fixed soon...
:response: &rfc2342_ex5_4 "* NAMESPACE ((\"\" \"/\")) ((\"~\" \"/\")) ((\"#shared/\" \"/\") (\"#public/\"
\"/\") (\"#ftp/\" \"/\") (\"#news.\" \".\"))\r\n"
:response: &rfc2342_ex5_4 "* NAMESPACE ((\"\" \"/\")) ((\"~\" \"/\")) ((\"#shared/\" \"/\")(\"#public/\" \"/\")(\"#ftp/\" \"/\")(\"#news.\" \".\"))\r\n"
:expected: !ruby/struct:Net::IMAP::UntaggedResponse
name: NAMESPACE
data: !ruby/struct:Net::IMAP::Namespaces
Expand Down Expand Up @@ -100,7 +98,7 @@
raw_data: *rfc2342_ex5_5

NAMESPACE_rfc2342_example_5.6:
:response: &rfc2342_ex5_6 "* NAMESPACE ((\"\" \"/\") (\"#mh/\" \"/\" \"X-PARAM\" (\"FLAG1\" \"FLAG2\")))
:response: &rfc2342_ex5_6 "* NAMESPACE ((\"\" \"/\")(\"#mh/\" \"/\" \"X-PARAM\" (\"FLAG1\" \"FLAG2\")))
NIL NIL\r\n"
:expected: !ruby/struct:Net::IMAP::UntaggedResponse
name: NAMESPACE
Expand Down