Skip to content

Commit 5f08055

Browse files
committed
🐛⚡️♻️ NAMESPACE: fix parsing (not SP-delimited!)
I misread or misunderstood the spec when I first implemented this... I wrongly inserted SP-delimiters. Most servers don't list more than one namespace, so probably very few noticed the bug! Also: * ♻️ Rewrote using "new parser style" to more directly imitate the ABNF. * ⚡️ Small but measurable performance improvement. * ♻️ Add ParserUtils::Generator#def_token_matchers for quoted, string, nil, tagged_ext_label, etc. * ♻️ Move atom, astring, nstring, etc to top, so they can be aliased. * ♻️ Use NIL in nstring, nquoted
1 parent f30690e commit 5f08055

File tree

3 files changed

+160
-138
lines changed

3 files changed

+160
-138
lines changed

lib/net/imap/response_parser.rb

Lines changed: 100 additions & 134 deletions
Original file line numberDiff line numberDiff line change
@@ -222,24 +222,25 @@ def unescape_quoted(quoted)
222222

223223
def_char_matchers :SP, " ", :T_SPACE
224224

225+
def_char_matchers :lpar, "(", :T_LPAR
226+
def_char_matchers :rpar, ")", :T_RPAR
227+
225228
def_char_matchers :lbra, "[", :T_LBRA
226229
def_char_matchers :rbra, "]", :T_RBRA
227230

228-
# atom = 1*ATOM-CHAR
229-
#
230-
# TODO: match atom entirely by regexp (in the "lexer")
231-
def atom; -combine_adjacent(*ATOM_TOKENS) end
231+
def_token_matchers :quoted, T_QUOTED
232232

233-
# the #accept version of #atom
234-
def atom?; -combine_adjacent(*ATOM_TOKENS) if lookahead?(*ATOM_TOKENS) end
233+
# string = quoted / literal
234+
def_token_matchers :string, T_QUOTED, T_LITERAL
235235

236-
# Returns <tt>atom.upcase</tt>
237-
def case_insensitive__atom; -combine_adjacent(*ATOM_TOKENS).upcase end
236+
# use where string represents "LABEL" values
237+
def_token_matchers :case_insensitive_string,
238+
T_QUOTED, T_LITERAL,
239+
send: :upcase
238240

239-
# Returns <tt>atom?&.upcase</tt>
240-
def case_insensitive__atom?
241-
-combine_adjacent(*ATOM_TOKENS).upcase if lookahead?(*ATOM_TOKENS)
242-
end
241+
# n.b: NIL? and NIL! return the "NIL" atom string (truthy) on success.
242+
# NIL? returns nil when it does *not* match
243+
def_token_matchers :NIL, T_NIL
243244

244245
# In addition to explicitly uses of +tagged-ext-label+, use this to match
245246
# keywords when the grammar has not provided any extension syntax.
@@ -254,8 +255,47 @@ def case_insensitive__atom?
254255
# tagged-label-char = tagged-label-fchar / DIGIT / ":"
255256
#
256257
# TODO: add to lexer and only match tagged-ext-label
257-
alias tagged_ext_label case_insensitive__atom
258-
alias tagged_ext_label? case_insensitive__atom?
258+
def_token_matchers :tagged_ext_label, T_ATOM, T_NIL, send: :upcase
259+
260+
# atom = 1*ATOM-CHAR
261+
# ATOM-CHAR = <any CHAR except atom-specials>
262+
ATOM_TOKENS = [T_ATOM, T_NUMBER, T_NIL, T_LBRA, T_PLUS]
263+
264+
# ASTRING-CHAR = ATOM-CHAR / resp-specials
265+
# resp-specials = "]"
266+
ASTRING_CHARS_TOKENS = [*ATOM_TOKENS, T_RBRA].freeze
267+
268+
ASTRING_TOKENS = [T_QUOTED, *ASTRING_CHARS_TOKENS, T_LITERAL].freeze
269+
270+
# atom = 1*ATOM-CHAR
271+
#
272+
# TODO: match atom entirely by regexp (in the "lexer")
273+
def atom; -combine_adjacent(*ATOM_TOKENS) end
274+
275+
# the #accept version of #atom
276+
def atom?; -combine_adjacent(*ATOM_TOKENS) if lookahead?(*ATOM_TOKENS) end
277+
278+
# Returns <tt>atom.upcase</tt>
279+
def case_insensitive__atom; -combine_adjacent(*ATOM_TOKENS).upcase end
280+
281+
# Returns <tt>atom?&.upcase</tt>
282+
def case_insensitive__atom?
283+
-combine_adjacent(*ATOM_TOKENS).upcase if lookahead?(*ATOM_TOKENS)
284+
end
285+
286+
# TODO: handle astring_chars entirely inside the lexer
287+
def astring_chars
288+
combine_adjacent(*ASTRING_CHARS_TOKENS)
289+
end
290+
291+
# astring = 1*ASTRING-CHAR / string
292+
def astring
293+
lookahead?(*ASTRING_CHARS_TOKENS) ? astring_chars : string
294+
end
295+
296+
def astring?
297+
lookahead?(*ASTRING_CHARS_TOKENS) ? astring_chars : string?
298+
end
259299

260300
# Use #label or #label_in to assert specific known labels
261301
# (+tagged-ext-label+ only, not +atom+).
@@ -264,6 +304,15 @@ def label(word)
264304
parse_error("unexpected atom %p, expected %p instead", val, word)
265305
end
266306

307+
# nstring = string / nil
308+
def nstring
309+
NIL? ? nil : string
310+
end
311+
312+
def nquoted
313+
NIL? ? nil : quoted
314+
end
315+
267316
def response
268317
token = lookahead
269318
case token.symbol
@@ -1198,65 +1247,56 @@ def id_response
11981247
end
11991248
end
12001249

1250+
# namespace-response = "NAMESPACE" SP namespace
1251+
# SP namespace SP namespace
1252+
# ; The first Namespace is the Personal Namespace(s).
1253+
# ; The second Namespace is the Other Users'
1254+
# ; Namespace(s).
1255+
# ; The third Namespace is the Shared Namespace(s).
12011256
def namespace_response
1257+
name = label("NAMESPACE")
12021258
@lex_state = EXPR_DATA
1203-
token = lookahead
1204-
token = match(T_ATOM)
1205-
name = token.value.upcase
1206-
match(T_SPACE)
1207-
personal = namespaces
1208-
match(T_SPACE)
1209-
other = namespaces
1210-
match(T_SPACE)
1211-
shared = namespaces
1259+
data = Namespaces.new((SP!; namespace),
1260+
(SP!; namespace),
1261+
(SP!; namespace))
1262+
UntaggedResponse.new(name, data, @str)
1263+
ensure
12121264
@lex_state = EXPR_BEG
1213-
data = Namespaces.new(personal, other, shared)
1214-
return UntaggedResponse.new(name, data, @str)
1215-
end
1216-
1217-
def namespaces
1218-
token = lookahead
1219-
# empty () is not allowed, so nil is functionally identical to empty.
1220-
data = []
1221-
if token.symbol == T_NIL
1222-
shift_token
1223-
else
1224-
match(T_LPAR)
1225-
loop do
1226-
data << namespace
1227-
break unless lookahead.symbol == T_SPACE
1228-
shift_token
1229-
end
1230-
match(T_RPAR)
1231-
end
1232-
data
12331265
end
12341266

1267+
# namespace = nil / "(" 1*namespace-descr ")"
12351268
def namespace
1236-
match(T_LPAR)
1237-
prefix = match(T_QUOTED, T_LITERAL).value
1238-
match(T_SPACE)
1239-
delimiter = string
1269+
NIL? and return []
1270+
lpar
1271+
list = [namespace_descr]
1272+
list << namespace_descr until rpar?
1273+
list
1274+
end
1275+
1276+
# namespace-descr = "(" string SP
1277+
# (DQUOTE QUOTED-CHAR DQUOTE / nil)
1278+
# [namespace-response-extensions] ")"
1279+
def namespace_descr
1280+
lpar
1281+
prefix = string; SP!
1282+
delimiter = nquoted # n.b: should only accept single char
12401283
extensions = namespace_response_extensions
1241-
match(T_RPAR)
1284+
rpar
12421285
Namespace.new(prefix, delimiter, extensions)
12431286
end
12441287

1288+
# namespace-response-extensions = *namespace-response-extension
1289+
# namespace-response-extension = SP string SP
1290+
# "(" string *(SP string) ")"
12451291
def namespace_response_extensions
12461292
data = {}
1247-
token = lookahead
1248-
if token.symbol == T_SPACE
1249-
shift_token
1250-
name = match(T_QUOTED, T_LITERAL).value
1293+
while SP?
1294+
name = string; SP!
1295+
lpar
12511296
data[name] ||= []
1252-
match(T_SPACE)
1253-
match(T_LPAR)
1254-
loop do
1255-
data[name].push match(T_QUOTED, T_LITERAL).value
1256-
break unless lookahead.symbol == T_SPACE
1257-
shift_token
1258-
end
1259-
match(T_RPAR)
1297+
data[name] << string
1298+
data[name] << string while SP?
1299+
rpar
12601300
end
12611301
data
12621302
end
@@ -1459,80 +1499,6 @@ def flag_list
14591499
end
14601500
end
14611501

1462-
def nstring
1463-
token = lookahead
1464-
if token.symbol == T_NIL
1465-
shift_token
1466-
return nil
1467-
else
1468-
return string
1469-
end
1470-
end
1471-
1472-
def astring
1473-
token = lookahead
1474-
if string_token?(token)
1475-
return string
1476-
else
1477-
return astring_chars
1478-
end
1479-
end
1480-
1481-
def string
1482-
token = lookahead
1483-
if token.symbol == T_NIL
1484-
shift_token
1485-
return nil
1486-
end
1487-
token = match(T_QUOTED, T_LITERAL)
1488-
return token.value
1489-
end
1490-
1491-
STRING_TOKENS = [T_QUOTED, T_LITERAL, T_NIL]
1492-
1493-
def string_token?(token)
1494-
return STRING_TOKENS.include?(token.symbol)
1495-
end
1496-
1497-
def case_insensitive_string
1498-
token = lookahead
1499-
if token.symbol == T_NIL
1500-
shift_token
1501-
return nil
1502-
end
1503-
token = match(T_QUOTED, T_LITERAL)
1504-
return token.value.upcase
1505-
end
1506-
1507-
# atom = 1*ATOM-CHAR
1508-
# ATOM-CHAR = <any CHAR except atom-specials>
1509-
ATOM_TOKENS = [
1510-
T_ATOM,
1511-
T_NUMBER,
1512-
T_NIL,
1513-
T_LBRA,
1514-
T_PLUS
1515-
]
1516-
1517-
# ASTRING-CHAR = ATOM-CHAR / resp-specials
1518-
# resp-specials = "]"
1519-
ASTRING_CHARS_TOKENS = [*ATOM_TOKENS, T_RBRA]
1520-
1521-
def astring_chars
1522-
combine_adjacent(*ASTRING_CHARS_TOKENS)
1523-
end
1524-
1525-
def combine_adjacent(*tokens)
1526-
result = "".b
1527-
while token = accept(*tokens)
1528-
result << token.value
1529-
end
1530-
if result.empty?
1531-
parse_error('unexpected token %s (expected %s)',
1532-
lookahead.symbol, args.join(" or "))
1533-
end
1534-
result
1535-
end
15361502

15371503
# See https://www.rfc-editor.org/errata/rfc3501
15381504
#

lib/net/imap/response_parser/parser_utils.rb

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,10 +47,68 @@ def #{match_name}
4747
RUBY
4848
end
4949

50+
# TODO: move coersion to the token.value method?
51+
def def_token_matchers(name, *token_symbols, coerce: nil, send: nil)
52+
match_name = name.match(/\A[A-Z]/) ? "#{name}!" : name
53+
54+
if token_symbols.size == 1
55+
token = token_symbols.first
56+
matcher = "token&.symbol == %p" % [token]
57+
desc = token
58+
else
59+
matcher = "%p.include? token&.symbol" % [token_symbols]
60+
desc = token_symbols.join(" or ")
61+
end
62+
63+
value = "(token.value)"
64+
value = coerce.to_s + value if coerce
65+
value = [value, send].join(".") if send
66+
67+
raise_parse_error = <<~RUBY
68+
parse_error("unexpected %s (expected #{desc})", token&.symbol)
69+
RUBY
70+
71+
class_eval <<~RUBY, __FILE__, __LINE__ + 1
72+
# frozen_string_literal: true
73+
74+
def #{name}?
75+
token = #{LOOKAHEAD}
76+
if #{matcher}
77+
#{SHIFT_TOKEN}
78+
#{value}
79+
end
80+
end
81+
82+
def #{match_name}
83+
token = #{LOOKAHEAD}
84+
if #{matcher}
85+
#{SHIFT_TOKEN}
86+
#{value}
87+
else
88+
#{raise_parse_error}
89+
end
90+
end
91+
RUBY
92+
end
93+
5094
end
5195

5296
private
5397

98+
# TODO: after checking the lookahead, use a regexp for remaining chars.
99+
# That way a loop isn't needed.
100+
def combine_adjacent(*tokens)
101+
result = "".b
102+
while token = accept(*tokens)
103+
result << token.value
104+
end
105+
if result.empty?
106+
parse_error('unexpected token %s (expected %s)',
107+
lookahead.symbol, tokens.join(" or "))
108+
end
109+
result
110+
end
111+
54112
def match(*args)
55113
token = lookahead
56114
unless args.include?(token.symbol)

test/net/imap/fixtures/response_parser/namespace_responses.yml

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,7 @@
4949
raw_data: *rfc2342_ex5_3
5050

5151
NAMESPACE_rfc2342_example_5.4:
52-
# WARNING: this example is wrong and will be fixed soon...
53-
:response: &rfc2342_ex5_4 "* NAMESPACE ((\"\" \"/\")) ((\"~\" \"/\")) ((\"#shared/\" \"/\") (\"#public/\"
54-
\"/\") (\"#ftp/\" \"/\") (\"#news.\" \".\"))\r\n"
52+
:response: &rfc2342_ex5_4 "* NAMESPACE ((\"\" \"/\")) ((\"~\" \"/\")) ((\"#shared/\" \"/\")(\"#public/\" \"/\")(\"#ftp/\" \"/\")(\"#news.\" \".\"))\r\n"
5553
:expected: !ruby/struct:Net::IMAP::UntaggedResponse
5654
name: NAMESPACE
5755
data: !ruby/struct:Net::IMAP::Namespaces
@@ -100,7 +98,7 @@
10098
raw_data: *rfc2342_ex5_5
10199

102100
NAMESPACE_rfc2342_example_5.6:
103-
:response: &rfc2342_ex5_6 "* NAMESPACE ((\"\" \"/\") (\"#mh/\" \"/\" \"X-PARAM\" (\"FLAG1\" \"FLAG2\")))
101+
:response: &rfc2342_ex5_6 "* NAMESPACE ((\"\" \"/\")(\"#mh/\" \"/\" \"X-PARAM\" (\"FLAG1\" \"FLAG2\")))
104102
NIL NIL\r\n"
105103
:expected: !ruby/struct:Net::IMAP::UntaggedResponse
106104
name: NAMESPACE

0 commit comments

Comments
 (0)