diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 00000000..fc43c62e --- /dev/null +++ b/.gitattributes @@ -0,0 +1,2 @@ +tests/syntax/fixtures_reference/crlf.ftl eol=crlf +tests/syntax/fixtures_structure/crlf.ftl eol=crlf diff --git a/fluent/syntax/ast.py b/fluent/syntax/ast.py index f2e94806..6d0eff75 100644 --- a/fluent/syntax/ast.py +++ b/fluent/syntax/ast.py @@ -303,11 +303,6 @@ def __init__(self, name, **kwargs): self.name = name -class VariantName(Identifier): - def __init__(self, name, **kwargs): - super(VariantName, self).__init__(name, **kwargs) - - class BaseComment(Entry): def __init__(self, content=None, **kwargs): super(BaseComment, self).__init__(**kwargs) diff --git a/fluent/syntax/ftlstream.py b/fluent/syntax/ftlstream.py deleted file mode 100644 index a6c73357..00000000 --- a/fluent/syntax/ftlstream.py +++ /dev/null @@ -1,295 +0,0 @@ -from __future__ import unicode_literals -from .stream import ParserStream -from .errors import ParseError - - -INLINE_WS = (' ', '\t') -SPECIAL_LINE_START_CHARS = ('}', '.', '[', '*') - - -class FTLParserStream(ParserStream): - last_comment_zero_four_syntax = False - - def skip_inline_ws(self): - while self.ch: - if self.ch not in INLINE_WS: - break - self.next() - - def peek_inline_ws(self): - ch = self.current_peek() - while ch: - if ch not in INLINE_WS: - break - ch = self.peek() - - def skip_blank_lines(self): - line_count = 0 - while True: - self.peek_inline_ws() - - if self.current_peek_is('\n'): - self.skip_to_peek() - self.next() - line_count += 1 - else: - self.reset_peek() - return line_count - - def peek_blank_lines(self): - while True: - line_start = self.get_peek_index() - - self.peek_inline_ws() - - if self.current_peek_is('\n'): - self.peek() - else: - self.reset_peek(line_start) - break - - def skip_indent(self): - self.skip_blank_lines() - self.skip_inline_ws() - - def expect_char(self, ch): - if self.ch == ch: - self.next() - return True - - if ch == '\n': - # Unicode Character 'SYMBOL FOR NEWLINE' (U+2424) - raise ParseError('E0003', '\u2424') - - raise ParseError('E0003', ch) - - def expect_indent(self): - self.expect_char('\n') - self.skip_blank_lines() - self.expect_char(' ') - self.skip_inline_ws() - - def expect_line_end(self): - if self.ch is None: - # EOF is a valid line end in Fluent. - return True - return self.expect_char('\n') - - def take_char(self, f): - ch = self.ch - if ch is not None and f(ch): - self.next() - return ch - return None - - def is_char_id_start(self, ch=None): - if ch is None: - return False - - cc = ord(ch) - return (cc >= 97 and cc <= 122) or \ - (cc >= 65 and cc <= 90) - - def is_identifier_start(self): - ch = self.current_peek() - is_id = self.is_char_id_start(ch) - self.reset_peek() - return is_id - - def is_number_start(self): - ch = self.peek() if self.current_is('-') else self.current() - if ch is None: - return False - - cc = ord(ch) - is_digit = cc >= 48 and cc <= 57 - self.reset_peek() - return is_digit - - def is_char_pattern_continuation(self, ch): - if ch is None: - return False - - return ch not in SPECIAL_LINE_START_CHARS - - def is_peek_value_start(self): - self.peek_inline_ws() - ch = self.current_peek() - - # Inline Patterns may start with any char. - if ch is not None and ch != '\n': - return True - - return self.is_peek_next_line_value() - - def is_peek_next_line_zero_four_style_comment(self): - if not self.current_peek_is('\n'): - return False - - self.peek() - - if self.current_peek_is('/'): - self.peek() - if self.current_peek_is('/'): - self.reset_peek() - return True - - self.reset_peek() - return False - - # -1 - any - # 0 - comment - # 1 - group comment - # 2 - resource comment - def is_peek_next_line_comment(self, level=-1): - if not self.current_peek_is('\n'): - return False - - i = 0 - - while (i <= level or (level == -1 and i < 3)): - self.peek() - if not self.current_peek_is('#'): - if i <= level and level != -1: - self.reset_peek() - return False - break - i += 1 - - self.peek() - - if self.current_peek() in [' ', '\n']: - self.reset_peek() - return True - - self.reset_peek() - return False - - def is_peek_next_line_variant_start(self): - if not self.current_peek_is('\n'): - return False - - self.peek() - - self.peek_blank_lines() - - ptr = self.get_peek_index() - - self.peek_inline_ws() - - if (self.get_peek_index() - ptr == 0): - self.reset_peek() - return False - - if self.current_peek_is('*'): - self.peek() - - if self.current_peek_is('[') and not self.peek_char_is('['): - self.reset_peek() - return True - - self.reset_peek() - return False - - def is_peek_next_line_attribute_start(self): - if not self.current_peek_is('\n'): - return False - - self.peek() - - self.peek_blank_lines() - - ptr = self.get_peek_index() - - self.peek_inline_ws() - - if (self.get_peek_index() - ptr == 0): - self.reset_peek() - return False - - if self.current_peek_is('.'): - self.reset_peek() - return True - - self.reset_peek() - return False - - def is_peek_next_line_value(self): - if not self.current_peek_is('\n'): - return False - - self.peek() - - self.peek_blank_lines() - - ptr = self.get_peek_index() - - self.peek_inline_ws() - - if (self.get_peek_index() - ptr == 0): - self.reset_peek() - return False - - if not self.is_char_pattern_continuation(self.current_peek()): - self.reset_peek() - return False - - self.reset_peek() - return True - - def skip_to_next_entry_start(self): - while self.ch: - if self.current_is('\n') and not self.peek_char_is('\n'): - self.next() - - if self.ch is None or \ - self.is_identifier_start() or \ - self.current_is('-') or \ - self.current_is('#') or \ - (self.current_is('/') and self.peek_char_is('/')) or \ - (self.current_is('[') and self.peek_char_is('[')): - break - self.next() - - def take_id_start(self): - if self.is_char_id_start(self.ch): - ret = self.ch - self.next() - return ret - - raise ParseError('E0004', 'a-zA-Z') - - def take_id_char(self): - def closure(ch): - cc = ord(ch) - return ((cc >= 97 and cc <= 122) or - (cc >= 65 and cc <= 90) or - (cc >= 48 and cc <= 57) or - cc == 95 or cc == 45) - return self.take_char(closure) - - def take_variant_name_char(self): - def closure(ch): - if ch is None: - return False - cc = ord(ch) - return (cc >= 97 and cc <= 122) or \ - (cc >= 65 and cc <= 90) or \ - (cc >= 48 and cc <= 57) or \ - cc == 95 or cc == 45 or cc == 32 - return self.take_char(closure) - - def take_digit(self): - def closure(ch): - cc = ord(ch) - return (cc >= 48 and cc <= 57) - return self.take_char(closure) - - def take_hex_digit(self): - def closure(ch): - cc = ord(ch) - return ( - (cc >= 48 and cc <= 57) # 0-9 - or (cc >= 65 and cc <= 70) # A-F - or (cc >= 97 and cc <= 102)) # a-f - return self.take_char(closure) diff --git a/fluent/syntax/parser.py b/fluent/syntax/parser.py index a658b098..318a6f05 100644 --- a/fluent/syntax/parser.py +++ b/fluent/syntax/parser.py @@ -1,7 +1,7 @@ from __future__ import unicode_literals import re -from .ftlstream import FTLParserStream from . import ast +from .stream import EOF, EOL, FluentParserStream from .errors import ParseError @@ -10,7 +10,7 @@ def decorated(self, ps, *args): if not self.with_spans: return fn(self, ps, *args) - start = ps.get_index() + start = ps.index node = fn(self, ps, *args) # Don't re-add the span if the node already has it. This may happen @@ -18,7 +18,7 @@ def decorated(self, ps, *args): if node.span is not None: return node - end = ps.get_index() + end = ps.index node.add_span(start, end) return node @@ -30,15 +30,15 @@ def __init__(self, with_spans=True): self.with_spans = with_spans def parse(self, source): - ps = FTLParserStream(source) - ps.skip_blank_lines() + ps = FluentParserStream(source) + ps.skip_blank_block() entries = [] last_comment = None - while ps.current(): + while ps.current_char: entry = self.get_entry_or_junk(ps) - blank_lines = ps.skip_blank_lines() + blank_lines = ps.skip_blank_block() # Regular Comments require special logic. Comments may be attached # to Messages or Terms if they are followed immediately by them. @@ -47,7 +47,7 @@ def parse(self, source): # Message or the Term parsed successfully. if ( isinstance(entry, ast.Comment) - and blank_lines == 0 and ps.current() + and blank_lines == 0 and ps.current_char ): # Stash the comment and decide what to do with it # in the next pass. @@ -79,7 +79,7 @@ def parse(self, source): res = ast.Resource(entries) if self.with_spans: - res.add_span(0, ps.get_index()) + res.add_span(0, ps.index) return res @@ -92,32 +92,35 @@ def parse_entry(self, source): Preceding comments are ignored unless they contain syntax errors themselves, in which case Junk for the invalid comment is returned. """ - ps = FTLParserStream(source) - ps.skip_blank_lines() + ps = FluentParserStream(source) + ps.skip_blank_block() - while ps.current_is('#'): + while ps.current_char == '#': skipped = self.get_entry_or_junk(ps) if isinstance(skipped, ast.Junk): # Don't skip Junk comments. return skipped - ps.skip_blank_lines() + ps.skip_blank_block() return self.get_entry_or_junk(ps) def get_entry_or_junk(self, ps): - entry_start_pos = ps.get_index() + entry_start_pos = ps.index try: entry = self.get_entry(ps) ps.expect_line_end() return entry except ParseError as err: - error_index = ps.get_index() - ps.skip_to_next_entry_start() - next_entry_start = ps.get_index() + error_index = ps.index + ps.skip_to_next_entry_start(entry_start_pos) + next_entry_start = ps.index + if next_entry_start < error_index: + # The position of the error must be inside of the Junk's span. + error_index = next_entry_start # Create a Junk instance - slice = ps.get_slice(entry_start_pos, next_entry_start) + slice = ps.string[entry_start_pos:next_entry_start] junk = ast.Junk(slice) if self.with_spans: junk.add_span(entry_start_pos, next_entry_start) @@ -127,16 +130,16 @@ def get_entry_or_junk(self, ps): return junk def get_entry(self, ps): - if ps.current_is('#'): + if ps.current_char == '#': return self.get_comment(ps) - if ps.current_is('/'): + if ps.current_char == '/': return self.get_zero_four_style_comment(ps) - if ps.current_is('['): + if ps.current_char == '[': return self.get_group_comment_from_section(ps) - if ps.current_is('-'): + if ps.current_char == '-': return self.get_term(ps) if ps.is_identifier_start(): @@ -153,13 +156,13 @@ def get_zero_four_style_comment(self, ps): content = '' while True: - ch = ps.take_char(lambda x: x != '\n') + ch = ps.take_char(lambda x: x != EOL) while ch: content += ch - ch = ps.take_char(lambda x: x != '\n') + ch = ps.take_char(lambda x: x != EOL) - if ps.is_peek_next_line_zero_four_style_comment(): - content += ps.current() + if ps.is_next_line_zero_four_comment(skip=False): + content += ps.current_char ps.next() ps.expect_char('/') ps.expect_char('/') @@ -168,8 +171,7 @@ def get_zero_four_style_comment(self, ps): break # Comments followed by Sections become GroupComments. - ps.peek() - if ps.current_peek_is('['): + if ps.peek() == '[': ps.skip_to_peek() self.get_group_comment_from_section(ps) return ast.GroupComment(content) @@ -188,22 +190,23 @@ def get_comment(self, ps): while True: i = -1 - while ps.current_is('#') and (i < (2 if level == -1 else level)): + while ps.current_char == '#' \ + and (i < (2 if level == -1 else level)): ps.next() i += 1 if level == -1: level = i - if not ps.current_is('\n'): + if ps.current_char != EOL: ps.expect_char(' ') - ch = ps.take_char(lambda x: x != '\n') + ch = ps.take_char(lambda x: x != EOL) while ch: content += ch - ch = ps.take_char(lambda x: x != '\n') + ch = ps.take_char(lambda x: x != EOL) - if ps.is_peek_next_line_comment(level): - content += ps.current() + if ps.is_next_line_comment(skip=False, level=level): + content += ps.current_char ps.next() else: break @@ -217,15 +220,13 @@ def get_comment(self, ps): @with_span def get_group_comment_from_section(self, ps): + def until_closing_bracket_or_eol(ch): + return ch not in (']', EOL) + ps.expect_char('[') ps.expect_char('[') - - ps.skip_inline_ws() - - self.get_variant_name(ps) - - ps.skip_inline_ws() - + while ps.take_char(until_closing_bracket_or_eol): + pass ps.expect_char(']') ps.expect_char(']') @@ -237,20 +238,17 @@ def get_group_comment_from_section(self, ps): def get_message(self, ps): id = self.get_identifier(ps) - ps.skip_inline_ws() + ps.skip_blank_inline() pattern = None # XXX Syntax 0.4 compat - if ps.current_is('='): + if ps.current_char == '=': ps.next() - if ps.is_peek_value_start(): - ps.skip_indent() + if ps.is_value_start(skip=True): pattern = self.get_pattern(ps) - else: - ps.skip_inline_ws() - if ps.is_peek_next_line_attribute_start(): + if ps.is_next_line_attribute_start(skip=True): attrs = self.get_attributes(ps) else: attrs = None @@ -264,16 +262,15 @@ def get_message(self, ps): def get_term(self, ps): id = self.get_term_identifier(ps) - ps.skip_inline_ws() + ps.skip_blank_inline() ps.expect_char('=') - if ps.is_peek_value_start(): - ps.skip_indent() + if ps.is_value_start(skip=True): value = self.get_value(ps) else: raise ParseError('E0006', id.name) - if ps.is_peek_next_line_attribute_start(): + if ps.is_next_line_attribute_start(skip=True): attrs = self.get_attributes(ps) else: attrs = None @@ -286,11 +283,10 @@ def get_attribute(self, ps): key = self.get_identifier(ps) - ps.skip_inline_ws() + ps.skip_blank_inline() ps.expect_char('=') - if ps.is_peek_value_start(): - ps.skip_indent() + if ps.is_value_start(skip=True): value = self.get_pattern(ps) return ast.Attribute(key, value) @@ -300,11 +296,10 @@ def get_attributes(self, ps): attrs = [] while True: - ps.expect_indent() attr = self.get_attribute(ps) attrs.append(attr) - if not ps.is_peek_next_line_attribute_start(): + if not ps.is_next_line_attribute_start(skip=True): break return attrs @@ -325,35 +320,36 @@ def get_term_identifier(self, ps): return ast.Identifier('-{}'.format(id.name)) def get_variant_key(self, ps): - ch = ps.current() + ch = ps.current_char - if ch is None: + if ch is EOF: raise ParseError('E0013') cc = ord(ch) if ((cc >= 48 and cc <= 57) or cc == 45): # 0-9, - return self.get_number(ps) - return self.get_variant_name(ps) + return self.get_identifier(ps) @with_span def get_variant(self, ps, has_default): default_index = False - if ps.current_is('*'): + if ps.current_char == '*': if has_default: raise ParseError('E0015') ps.next() default_index = True ps.expect_char('[') + ps.skip_blank() key = self.get_variant_key(ps) + ps.skip_blank() ps.expect_char(']') - if ps.is_peek_value_start(): - ps.skip_indent() + if ps.is_value_start(skip=True): value = self.get_value(ps) return ast.Variant(key, value, default_index) @@ -364,7 +360,6 @@ def get_variants(self, ps): has_default = False while True: - ps.expect_indent() variant = self.get_variant(ps, has_default) if variant.default: @@ -372,26 +367,16 @@ def get_variants(self, ps): variants.append(variant) - if not ps.is_peek_next_line_variant_start(): + if not ps.is_next_line_variant_start(skip=False): break + ps.skip_blank() + if not has_default: raise ParseError('E0010') return variants - @with_span - def get_variant_name(self, ps): - name = ps.take_id_start() - while True: - ch = ps.take_variant_name_char() - if ch: - name += ch - else: - break - - return ast.VariantName(name.rstrip(' \t\n\r')) - def get_digits(self, ps): num = '' @@ -409,13 +394,13 @@ def get_digits(self, ps): def get_number(self, ps): num = '' - if ps.current_is('-'): + if ps.current_char == '-': num += '-' ps.next() num += self.get_digits(ps) - if ps.current_is('.'): + if ps.current_char == '.': num += '.' ps.next() num += self.get_digits(ps) @@ -424,34 +409,37 @@ def get_number(self, ps): @with_span def get_value(self, ps): - if ps.current_is('{'): + if ps.current_char == '{': ps.peek() - ps.peek_inline_ws() - if ps.is_peek_next_line_variant_start(): + ps.peek_blank_inline() + if ps.is_next_line_variant_start(skip=False): return self.get_variant_list(ps) + ps.reset_peek() return self.get_pattern(ps) @with_span def get_variant_list(self, ps): ps.expect_char('{') - ps.skip_inline_ws() + ps.skip_blank_inline() + ps.expect_line_end() + ps.skip_blank() variants = self.get_variants(ps) - ps.expect_indent() + ps.expect_line_end() + ps.skip_blank() ps.expect_char('}') return ast.VariantList(variants) @with_span def get_pattern(self, ps): elements = [] - ps.skip_inline_ws() - while ps.current(): - ch = ps.current() + while ps.current_char: + ch = ps.current_char # The end condition for get_pattern's while loop is a newline # which is not followed by a valid pattern continuation. - if ch == '\n' and not ps.is_peek_next_line_value(): + if ch == EOL and not ps.is_next_line_value(skip=False): break if ch == '{': @@ -464,6 +452,8 @@ def get_pattern(self, ps): last_element = elements[-1] if isinstance(last_element, ast.TextElement): last_element.value = last_element.value.rstrip(' \t\n\r') + if last_element.value == "": + elements.pop() return ast.Pattern(elements) @@ -471,34 +461,34 @@ def get_pattern(self, ps): def get_text_element(self, ps): buf = '' - while ps.current(): - ch = ps.current() + while ps.current_char: + ch = ps.current_char if ch == '{': return ast.TextElement(buf) - if ch == '\n': - if not ps.is_peek_next_line_value(): + if ch == EOL: + if not ps.is_next_line_value(skip=False): return ast.TextElement(buf) ps.next() - ps.skip_inline_ws() + ps.skip_blank_inline() - # Add the new line to the buffer - buf += ch + buf += EOL continue if ch == '\\': ps.next() buf += self.get_escape_sequence(ps) - else: - buf += ch - ps.next() + continue + + buf += ch + ps.next() return ast.TextElement(buf) def get_escape_sequence(self, ps, specials=('{', '\\')): - next = ps.current() + next = ps.current_char if next in specials: ps.next() @@ -510,8 +500,8 @@ def get_escape_sequence(self, ps, specials=('{', '\\')): for _ in range(4): ch = ps.take_hex_digit() - if ch is None: - raise ParseError('E0026', sequence + ps.current()) + if not ch: + raise ParseError('E0026', sequence + ps.current_char) sequence += ch return '\\u{}'.format(sequence) @@ -527,16 +517,14 @@ def get_placeable(self, ps): @with_span def get_expression(self, ps): - ps.skip_inline_ws() + ps.skip_blank() selector = self.get_selector_expression(ps) - ps.skip_inline_ws() + ps.skip_blank() - if ps.current_is('-'): - ps.peek() - - if not ps.current_peek_is('>'): + if ps.current_char == '-': + if ps.peek() != '>': ps.reset_peek() return selector @@ -553,9 +541,12 @@ def get_expression(self, ps): ps.next() ps.next() - ps.skip_inline_ws() + ps.skip_blank_inline() + ps.expect_line_end() + ps.skip_blank() variants = self.get_variants(ps) + ps.skip_blank() if len(variants) == 0: raise ParseError('E0011') @@ -564,8 +555,6 @@ def get_expression(self, ps): if any(isinstance(v.value, ast.VariantList) for v in variants): raise ParseError('E0023') - ps.expect_indent() - return ast.SelectExpression(selector, variants) elif ( isinstance(selector, ast.AttributeExpression) @@ -573,11 +562,13 @@ def get_expression(self, ps): ): raise ParseError('E0019') + ps.skip_blank() + return selector @with_span def get_selector_expression(self, ps): - if ps.current_is('{'): + if ps.current_char == '{': return self.get_placeable(ps) literal = self.get_literal(ps) @@ -585,7 +576,7 @@ def get_selector_expression(self, ps): if not isinstance(literal, (ast.MessageReference, ast.TermReference)): return literal - ch = ps.current() + ch = ps.current_char if (ch == '.'): ps.next() @@ -623,16 +614,16 @@ def get_selector_expression(self, ps): def get_call_arg(self, ps): exp = self.get_selector_expression(ps) - ps.skip_inline_ws() + ps.skip_blank() - if not ps.current_is(':'): + if ps.current_char != ':': return exp if not isinstance(exp, ast.MessageReference): raise ParseError('E0009') ps.next() - ps.skip_inline_ws() + ps.skip_blank() val = self.get_arg_val(ps) @@ -643,11 +634,10 @@ def get_call_args(self, ps): named = [] argument_names = set() - ps.skip_inline_ws() - ps.skip_indent() + ps.skip_blank() while True: - if ps.current_is(')'): + if ps.current_char == ')': break arg = self.get_call_arg(ps) @@ -661,13 +651,11 @@ def get_call_args(self, ps): else: positional.append(arg) - ps.skip_inline_ws() - ps.skip_indent() + ps.skip_blank() - if ps.current_is(','): + if ps.current_char == ',': ps.next() - ps.skip_inline_ws() - ps.skip_indent() + ps.skip_blank() continue else: break @@ -677,7 +665,7 @@ def get_call_args(self, ps): def get_arg_val(self, ps): if ps.is_number_start(): return self.get_number(ps) - elif ps.current_is('"'): + elif ps.current_char == '"': return self.get_string(ps) raise ParseError('E0012') @@ -687,26 +675,26 @@ def get_string(self, ps): ps.expect_char('"') - ch = ps.take_char(lambda x: x != '"' and x != '\n') + ch = ps.take_char(lambda x: x != '"' and x != EOL) while ch: if ch == '\\': val += self.get_escape_sequence(ps, ('{', '\\', '"')) else: val += ch - ch = ps.take_char(lambda x: x != '"' and x != '\n') + ch = ps.take_char(lambda x: x != '"' and x != EOL) - if ps.current_is('\n'): + if ps.current_char == EOL: raise ParseError('E0020') - ps.next() + ps.expect_char('"') return ast.StringLiteral(val) @with_span def get_literal(self, ps): - ch = ps.current() + ch = ps.current_char - if ch is None: + if ch is EOF: raise ParseError('E0014') if ch == '$': diff --git a/fluent/syntax/serializer.py b/fluent/syntax/serializer.py index 55618891..0ae41069 100644 --- a/fluent/syntax/serializer.py +++ b/fluent/syntax/serializer.py @@ -276,8 +276,8 @@ def serialize_variant_name(symbol): def serialize_variant_key(key): - if isinstance(key, ast.VariantName): - return serialize_variant_name(key) + if isinstance(key, ast.Identifier): + return serialize_identifier(key) if isinstance(key, ast.NumberLiteral): return serialize_number_literal(key) raise Exception('Unknown variant key type: {}'.format(type(key))) diff --git a/fluent/syntax/stream.py b/fluent/syntax/stream.py index b1a1f8cf..aefb24ba 100644 --- a/fluent/syntax/stream.py +++ b/fluent/syntax/stream.py @@ -1,125 +1,332 @@ from __future__ import unicode_literals +from .errors import ParseError -class StringIter(): - def __init__(self, source): - self.source = source - self.len = len(source) - self.i = 0 +class ParserStream(object): + def __init__(self, string): + self.string = string + self.index = 0 + self.peek_offset = 0 + + def get(self, offset): + try: + return self.string[offset] + except IndexError: + return None + + def char_at(self, offset): + # When the cursor is at CRLF, return LF but don't move the cursor. The + # cursor still points to the EOL position, which in this case is the + # beginning of the compound CRLF sequence. This ensures slices of + # [inclusive, exclusive) continue to work properly. + if self.get(offset) == '\r' \ + and self.get(offset + 1) == '\n': + return '\n' + + return self.get(offset) + + @property + def current_char(self): + return self.char_at(self.index) + + @property + def current_peek(self): + return self.char_at(self.index + self.peek_offset) def next(self): - if self.i < self.len: - ret = self.source[self.i] - self.i += 1 - return ret - return None + self.peek_offset = 0 + # Skip over CRLF as if it was a single character. + if self.get(self.index) == '\r' \ + and self.get(self.index + 1) == '\n': + self.index += 1 + self.index += 1 + return self.get(self.index) - def get_slice(self, start, end): - return self.source[start:end] + def peek(self): + # Skip over CRLF as if it was a single character. + if self.get(self.index + self.peek_offset) == '\r' \ + and self.get(self.index + self.peek_offset + 1) == '\n': + self.peek_offset += 1 + self.peek_offset += 1 + return self.get(self.index + self.peek_offset) + def reset_peek(self, offset=0): + self.peek_offset = offset -class ParserStream(): - def __init__(self, string): - self.iter = StringIter(string) - self.buf = [] - self.peek_index = 0 - self.index = 0 + def skip_to_peek(self): + self.index += self.peek_offset + self.peek_offset = 0 - self.ch = None - self.iter_end = False - self.peek_end = False +EOL = '\n' +EOF = None +SPECIAL_LINE_START_CHARS = ('}', '.', '[', '*') - self.ch = self.iter.next() - def next(self): - if self.iter_end: - return None +class FluentParserStream(ParserStream): + last_comment_zero_four_syntax = False - if len(self.buf) == 0: - self.ch = self.iter.next() - else: - self.ch = self.buf.pop(0) + def skip_blank_inline(self): + while self.current_char == ' ': + self.next() - self.index += 1 + def peek_blank_inline(self): + while self.current_peek == ' ': + self.peek() - if self.ch is None: - self.iter_end = True - self.peek_end = True + def skip_blank_block(self): + line_count = 0 + while True: + self.peek_blank_inline() - self.peek_index = self.index + if self.current_peek == EOL: + self.skip_to_peek() + self.next() + line_count += 1 + else: + self.reset_peek() + return line_count - return self.ch + def peek_blank_block(self): + while True: + line_start = self.peek_offset - def current(self): - return self.ch + self.peek_blank_inline() - def current_is(self, ch): - return self.ch == ch + if self.current_peek == EOL: + self.peek() + else: + self.reset_peek(line_start) + break + + def skip_blank(self): + while self.current_char in (" ", EOL): + self.next() + + def peek_blank(self): + while self.current_peek in (" ", EOL): + self.peek() + + def expect_char(self, ch): + if self.current_char == ch: + self.next() + return True + + raise ParseError('E0003', ch) + + def expect_line_end(self): + if self.current_char is EOF: + # EOF is a valid line end in Fluent. + return True + + if self.current_char == EOL: + self.next() + return True + + # Unicode Character 'SYMBOL FOR NEWLINE' (U+2424) + raise ParseError('E0003', '\u2424') + + def take_char(self, f): + ch = self.current_char + if ch is EOF: + return EOF + if f(ch): + self.next() + return ch + return False + + def is_char_id_start(self, ch): + if ch is EOF: + return False - def current_peek(self): - if self.peek_end: - return None + cc = ord(ch) + return (cc >= 97 and cc <= 122) or \ + (cc >= 65 and cc <= 90) - diff = self.peek_index - self.index + def is_identifier_start(self): + return self.is_char_id_start(self.current_peek) - if diff == 0: - return self.ch - return self.buf[diff - 1] + def is_number_start(self): + ch = self.peek() if self.current_char == '-' else self.current_char + if ch is EOF: + self.reset_peek() + return False - def current_peek_is(self, ch): - return self.current_peek() == ch + cc = ord(ch) + is_digit = cc >= 48 and cc <= 57 + self.reset_peek() + return is_digit - def peek(self): - if self.peek_end: - return None + def is_char_pattern_continuation(self, ch): + if ch is EOF: + return False - self.peek_index += 1 + return ch not in SPECIAL_LINE_START_CHARS - diff = self.peek_index - self.index + def is_value_start(self, skip): + if skip is False: + raise NotImplementedError() - if diff > len(self.buf): - ch = self.iter.next() - if ch is not None: - self.buf.append(ch) - else: - self.peek_end = True - return None + self.peek_blank_inline() + ch = self.current_peek - return self.buf[diff - 1] + # Inline Patterns may start with any char. + if ch is not EOF and ch != EOL: + self.skip_to_peek() + return True - def get_index(self): - return self.index + return self.is_next_line_value(skip) - def get_peek_index(self): - return self.peek_index + def is_next_line_zero_four_comment(self, skip): + if skip is True: + raise NotImplementedError() - def peek_char_is(self, ch): - if self.peek_end: + if self.current_peek != EOL: return False - ret = self.peek() + is_comment = (self.peek(), self.peek()) == ('/', '/') + self.reset_peek() + return is_comment - self.peek_index -= 1 + # -1 - any + # 0 - comment + # 1 - group comment + # 2 - resource comment + def is_next_line_comment(self, skip, level=-1): + if skip is True: + raise NotImplementedError() - return ret == ch + if self.current_peek != EOL: + return False - def reset_peek(self, pos=False): - if pos: - if pos < self.peek_index: - self.peek_end = False - self.peek_index = pos - else: - self.peek_index = self.index - self.peek_end = self.iter_end + i = 0 - def skip_to_peek(self): - diff = self.peek_index - self.index + while (i <= level or (level == -1 and i < 3)): + if self.peek() != '#': + if i <= level and level != -1: + self.reset_peek() + return False + break + i += 1 + + # The first char after #, ## or ###. + if self.peek() in (' ', EOL): + self.reset_peek() + return True + + self.reset_peek() + return False + + def is_next_line_variant_start(self, skip): + if skip is True: + raise NotImplementedError() + + if self.current_peek != EOL: + return False + + self.peek_blank() + + if self.current_peek == '*': + self.peek() + + if self.current_peek == '[' and self.peek() != '[': + self.reset_peek() + return True - for i in range(0, diff): - self.ch = self.buf.pop(0) + self.reset_peek() + return False - self.index = self.peek_index + def is_next_line_attribute_start(self, skip): + if skip is False: + raise NotImplementedError() + + self.peek_blank() + + if self.current_peek == '.': + self.skip_to_peek() + return True + + self.reset_peek() + return False + + def is_next_line_value(self, skip): + if self.current_peek != EOL: + return False + + self.peek_blank_block() + + ptr = self.peek_offset + + self.peek_blank_inline() + + if self.current_peek != "{": + if (self.peek_offset - ptr == 0): + self.reset_peek() + return False + + if not self.is_char_pattern_continuation(self.current_peek): + self.reset_peek() + return False + + if skip: + self.skip_to_peek() + else: + self.reset_peek() + + return True + + def skip_to_next_entry_start(self, junk_start): + last_newline = self.string.rfind(EOL, 0, self.index) + if junk_start < last_newline: + # Last seen newline is _after_ the junk start. It's safe to rewind + # without the risk of resuming at the same broken entry. + self.index = last_newline + + while self.current_char: + # We're only interested in beginnings of line. + if self.current_char != EOL: + self.next() + continue + + # Break if the first char in this line looks like an entry start. + first = self.next() + if self.is_char_id_start(first) or first == '-' or first == '#': + break + + # Syntax 0.4 compatibility + peek = self.peek() + self.reset_peek() + if (first, peek) == ('/', '/') or (first, peek) == ('[', '['): + break + + def take_id_start(self): + if self.is_char_id_start(self.current_char): + ret = self.current_char + self.next() + return ret - def get_slice(self, start, end): - return self.iter.get_slice(start, end) + raise ParseError('E0004', 'a-zA-Z') + + def take_id_char(self): + def closure(ch): + cc = ord(ch) + return ((cc >= 97 and cc <= 122) or + (cc >= 65 and cc <= 90) or + (cc >= 48 and cc <= 57) or + cc == 95 or cc == 45) + return self.take_char(closure) + + def take_digit(self): + def closure(ch): + cc = ord(ch) + return (cc >= 48 and cc <= 57) + return self.take_char(closure) + + def take_hex_digit(self): + def closure(ch): + cc = ord(ch) + return ( + (cc >= 48 and cc <= 57) # 0-9 + or (cc >= 65 and cc <= 70) # A-F + or (cc >= 97 and cc <= 102)) # a-f + return self.take_char(closure) diff --git a/tests/syntax/README.md b/tests/syntax/README.md new file mode 100644 index 00000000..0c92ea6d --- /dev/null +++ b/tests/syntax/README.md @@ -0,0 +1,7 @@ +The files in `fixtures_*` are copied from `fluent.js/fluent-syntax`. Due to +the backwards compatibility with Syntax 0.4, the Python parser sometimes +produces different output, mainly in terms of reported errors. Currently, the +files which are known to differ are: + + fixtures_behavior/standalone_identifier.ftl + fixtures_structure/multiline_pattern.ftl diff --git a/tests/syntax/fixtures_behavior/attribute_starts_from_nl.ftl b/tests/syntax/fixtures_behavior/attribute_starts_from_nl.ftl index 5c8191e5..e996c6b5 100644 --- a/tests/syntax/fixtures_behavior/attribute_starts_from_nl.ftl +++ b/tests/syntax/fixtures_behavior/attribute_starts_from_nl.ftl @@ -1,3 +1,2 @@ foo = Value .attr = Value 2 -# ~ERROR E0002, pos 12 diff --git a/tests/syntax/fixtures_behavior/indent.ftl b/tests/syntax/fixtures_behavior/indent.ftl index 92357223..5c386613 100644 --- a/tests/syntax/fixtures_behavior/indent.ftl +++ b/tests/syntax/fixtures_behavior/indent.ftl @@ -3,13 +3,9 @@ key2 = { a } -# ~ERROR E0014, pos 20 -# ~ERROR E0005, pos 23, args "a" key3 = { a } -# ~ERROR E0003, pos 36, args "}" key4 = { { a }} -# ~ERROR E0014, pos 48 diff --git a/tests/syntax/fixtures_behavior/placeable_in_placeable.ftl b/tests/syntax/fixtures_behavior/placeable_in_placeable.ftl index 75d8bcbc..7ece456b 100644 --- a/tests/syntax/fixtures_behavior/placeable_in_placeable.ftl +++ b/tests/syntax/fixtures_behavior/placeable_in_placeable.ftl @@ -8,7 +8,7 @@ key2 = { { foo } } # } key4 = { { foo } -# ~ERROR E0003, pos 93, args "}" +# ~ERROR E0003, pos 96, args "}" -# key5 = { foo } } +key5 = { foo } } diff --git a/tests/syntax/fixtures_behavior/placeable_without_close_bracket.ftl b/tests/syntax/fixtures_behavior/placeable_without_close_bracket.ftl index 517e8ca1..c78e9aa4 100644 --- a/tests/syntax/fixtures_behavior/placeable_without_close_bracket.ftl +++ b/tests/syntax/fixtures_behavior/placeable_without_close_bracket.ftl @@ -1,3 +1,3 @@ key = { $num -# ~ERROR E0003, pos 12, args "}" +# ~ERROR E0003, pos 14, args "}" diff --git a/tests/syntax/fixtures_behavior/second_attribute_starts_from_nl.ftl b/tests/syntax/fixtures_behavior/second_attribute_starts_from_nl.ftl index 0caaa539..9a0d7f1c 100644 --- a/tests/syntax/fixtures_behavior/second_attribute_starts_from_nl.ftl +++ b/tests/syntax/fixtures_behavior/second_attribute_starts_from_nl.ftl @@ -1,4 +1,3 @@ key = Value .label = Value .accesskey = K -# ~ERROR E0002, pos 31 diff --git a/tests/syntax/fixtures_behavior/section_with_no_nl_after_it.ftl b/tests/syntax/fixtures_behavior/section_with_no_nl_after_it.ftl index f11f4c06..a26fcdf2 100644 --- a/tests/syntax/fixtures_behavior/section_with_no_nl_after_it.ftl +++ b/tests/syntax/fixtures_behavior/section_with_no_nl_after_it.ftl @@ -1 +1 @@ -[[ This is a correct section ]] +[[ This is a correct section ]] \ No newline at end of file diff --git a/tests/syntax/fixtures_behavior/selector_expression_ends_abruptly.ftl b/tests/syntax/fixtures_behavior/selector_expression_ends_abruptly.ftl index 00dc742f..a11a1814 100644 --- a/tests/syntax/fixtures_behavior/selector_expression_ends_abruptly.ftl +++ b/tests/syntax/fixtures_behavior/selector_expression_ends_abruptly.ftl @@ -1,2 +1,2 @@ key = { $foo -> -# ~ERROR E0003, pos 16, args " " +# ~ERROR E0003, pos 16, args "[" diff --git a/tests/syntax/fixtures_behavior/unclosed_empty_placeable_error.ftl b/tests/syntax/fixtures_behavior/unclosed_empty_placeable_error.ftl index 093903c7..5f812a94 100644 --- a/tests/syntax/fixtures_behavior/unclosed_empty_placeable_error.ftl +++ b/tests/syntax/fixtures_behavior/unclosed_empty_placeable_error.ftl @@ -1,2 +1,5 @@ -bar = Bar { -# ~ERROR E0014, pos 11 +# ~ERROR E0003, pos 8, args "}" +foo = { +bar = Bar +# ~ERROR E0014, pos 26 +baz = { diff --git a/tests/syntax/fixtures_behavior/variant_ends_abruptly.ftl b/tests/syntax/fixtures_behavior/variant_ends_abruptly.ftl index 04fd2ef7..aaead3d0 100644 --- a/tests/syntax/fixtures_behavior/variant_ends_abruptly.ftl +++ b/tests/syntax/fixtures_behavior/variant_ends_abruptly.ftl @@ -1,3 +1,3 @@ key = { $foo -> *[ -# ~ERROR E0004, pos 22, args "a-zA-Z" +# ~ERROR E0013, pos 23 diff --git a/tests/syntax/fixtures_behavior/variant_lists.ftl b/tests/syntax/fixtures_behavior/variant_lists.ftl index b63088f2..10f5cab3 100644 --- a/tests/syntax/fixtures_behavior/variant_lists.ftl +++ b/tests/syntax/fixtures_behavior/variant_lists.ftl @@ -1,10 +1,10 @@ -# ~ERROR E0014, pos 16 +# ~ERROR E0014, pos 25 message1 = { *[one] One } -# ~ERROR E0023, pos 118 +# ~ERROR E0023, pos 123 message2 = { $sel -> *[one] { @@ -24,7 +24,7 @@ message2 = } } -# ~ERROR E0023, pos 313 +# ~ERROR E0023, pos 318 -term3 = { $sel -> *[one] { diff --git a/tests/syntax/fixtures_behavior/variant_starts_from_nl.ftl b/tests/syntax/fixtures_behavior/variant_starts_from_nl.ftl index 927f7d57..26998864 100644 --- a/tests/syntax/fixtures_behavior/variant_starts_from_nl.ftl +++ b/tests/syntax/fixtures_behavior/variant_starts_from_nl.ftl @@ -1,4 +1,3 @@ -term = { *[one] Value } -# ~ERROR E0014, pos 9 diff --git a/tests/syntax/fixtures_behavior/variant_with_leading_space_in_name.ftl b/tests/syntax/fixtures_behavior/variant_with_leading_space_in_name.ftl index 21a14a08..004f482e 100644 --- a/tests/syntax/fixtures_behavior/variant_with_leading_space_in_name.ftl +++ b/tests/syntax/fixtures_behavior/variant_with_leading_space_in_name.ftl @@ -1,4 +1,3 @@ -term = { *[ one] Foo } -# ~ERROR E0004, pos 20, args "a-zA-Z" diff --git a/tests/syntax/fixtures_behavior/variant_with_symbol_with_space.ftl b/tests/syntax/fixtures_behavior/variant_with_symbol_with_space.ftl index a3a630c1..d8461102 100644 --- a/tests/syntax/fixtures_behavior/variant_with_symbol_with_space.ftl +++ b/tests/syntax/fixtures_behavior/variant_with_symbol_with_space.ftl @@ -1,3 +1,4 @@ +# ~ERROR E0003, pos 24, args "]" -term = { *[New York] Nowy Jork } diff --git a/tests/syntax/fixtures_structure/crlf.ftl b/tests/syntax/fixtures_structure/crlf.ftl new file mode 100644 index 00000000..df3a02c5 --- /dev/null +++ b/tests/syntax/fixtures_structure/crlf.ftl @@ -0,0 +1,14 @@ + +key01 = Value 01 +key02 = + + Value 02 + Continued + + .title = Title + +# ERROR Unclosed StringLiteral +err03 = { "str + +# ERROR Missing newline after ->. +err04 = { $sel -> } diff --git a/tests/syntax/fixtures_structure/crlf.json b/tests/syntax/fixtures_structure/crlf.json new file mode 100644 index 00000000..b72afa33 --- /dev/null +++ b/tests/syntax/fixtures_structure/crlf.json @@ -0,0 +1,187 @@ +{ + "type": "Resource", + "body": [ + { + "type": "Message", + "id": { + "type": "Identifier", + "name": "key01", + "span": { + "type": "Span", + "start": 2, + "end": 7 + } + }, + "value": { + "type": "Pattern", + "elements": [ + { + "type": "TextElement", + "value": "Value 01", + "span": { + "type": "Span", + "start": 10, + "end": 18 + } + } + ], + "span": { + "type": "Span", + "start": 10, + "end": 18 + } + }, + "attributes": [], + "comment": null, + "span": { + "type": "Span", + "start": 2, + "end": 18 + } + }, + { + "type": "Message", + "id": { + "type": "Identifier", + "name": "key02", + "span": { + "type": "Span", + "start": 20, + "end": 25 + } + }, + "value": { + "type": "Pattern", + "elements": [ + { + "type": "TextElement", + "value": "Value 02\nContinued", + "span": { + "type": "Span", + "start": 35, + "end": 58 + } + } + ], + "span": { + "type": "Span", + "start": 35, + "end": 58 + } + }, + "attributes": [ + { + "type": "Attribute", + "id": { + "type": "Identifier", + "name": "title", + "span": { + "type": "Span", + "start": 67, + "end": 72 + } + }, + "value": { + "type": "Pattern", + "elements": [ + { + "type": "TextElement", + "value": "Title", + "span": { + "type": "Span", + "start": 75, + "end": 80 + } + } + ], + "span": { + "type": "Span", + "start": 75, + "end": 80 + } + }, + "span": { + "type": "Span", + "start": 66, + "end": 80 + } + } + ], + "comment": null, + "span": { + "type": "Span", + "start": 20, + "end": 80 + } + }, + { + "type": "Comment", + "content": "ERROR Unclosed StringLiteral", + "span": { + "type": "Span", + "start": 84, + "end": 114 + } + }, + { + "type": "Junk", + "annotations": [ + { + "type": "Annotation", + "code": "E0020", + "args": [], + "message": "Unterminated string expression", + "span": { + "type": "Span", + "start": 130, + "end": 130 + } + } + ], + "content": "err03 = { \"str\r\n\r\n", + "span": { + "type": "Span", + "start": 116, + "end": 134 + } + }, + { + "type": "Comment", + "content": "ERROR Missing newline after ->.", + "span": { + "type": "Span", + "start": 134, + "end": 167 + } + }, + { + "type": "Junk", + "annotations": [ + { + "type": "Annotation", + "code": "E0003", + "args": [ + "␤" + ], + "message": "Expected token: \"␤\"", + "span": { + "type": "Span", + "start": 187, + "end": 187 + } + } + ], + "content": "err04 = { $sel -> }\r\n", + "span": { + "type": "Span", + "start": 169, + "end": 190 + } + } + ], + "span": { + "type": "Span", + "start": 0, + "end": 190 + } +} diff --git a/tests/syntax/fixtures_structure/elements_indent.json b/tests/syntax/fixtures_structure/elements_indent.json index 469903fc..6a23cc27 100644 --- a/tests/syntax/fixtures_structure/elements_indent.json +++ b/tests/syntax/fixtures_structure/elements_indent.json @@ -31,34 +31,49 @@ "end": 9 } }, - "attributes": [], - "comment": null, - "span": { - "type": "Span", - "start": 0, - "end": 9 - } - }, - { - "type": "Junk", - "annotations": [ + "attributes": [ { - "type": "Annotation", - "code": "E0002", - "args": [], - "message": "Expected an entry start", + "type": "Attribute", + "id": { + "type": "Identifier", + "name": "attr", + "span": { + "type": "Span", + "start": 11, + "end": 15 + } + }, + "value": { + "type": "Pattern", + "elements": [ + { + "type": "TextElement", + "value": "Foo Attr", + "span": { + "type": "Span", + "start": 18, + "end": 26 + } + } + ], + "span": { + "type": "Span", + "start": 18, + "end": 26 + } + }, "span": { "type": "Span", "start": 10, - "end": 10 + "end": 26 } } ], - "content": ".attr = Foo Attr\n\n", + "comment": null, "span": { "type": "Span", - "start": 10, - "end": 28 + "start": 0, + "end": 26 } }, { @@ -127,35 +142,49 @@ "start": 42, "end": 61 } - } - ], - "comment": null, - "span": { - "type": "Span", - "start": 28, - "end": 61 - } - }, - { - "type": "Junk", - "annotations": [ + }, { - "type": "Annotation", - "code": "E0002", - "args": [], - "message": "Expected an entry start", + "type": "Attribute", + "id": { + "type": "Identifier", + "name": "attr2", + "span": { + "type": "Span", + "start": 63, + "end": 68 + } + }, + "value": { + "type": "Pattern", + "elements": [ + { + "type": "TextElement", + "value": "Bar Attr 2", + "span": { + "type": "Span", + "start": 71, + "end": 81 + } + } + ], + "span": { + "type": "Span", + "start": 71, + "end": 81 + } + }, "span": { "type": "Span", "start": 62, - "end": 62 + "end": 81 } } ], - "content": ".attr2 = Bar Attr 2\n", + "comment": null, "span": { "type": "Span", - "start": 62, - "end": 82 + "start": 28, + "end": 81 } } ], diff --git a/tests/syntax/fixtures_structure/leading_dots.json b/tests/syntax/fixtures_structure/leading_dots.json index 34a19f38..837427b4 100644 --- a/tests/syntax/fixtures_structure/leading_dots.json +++ b/tests/syntax/fixtures_structure/leading_dots.json @@ -745,7 +745,7 @@ { "type": "Variant", "key": { - "type": "VariantName", + "type": "Identifier", "name": "one", "span": { "type": "Span", @@ -782,7 +782,7 @@ { "type": "Variant", "key": { - "type": "VariantName", + "type": "Identifier", "name": "other", "span": { "type": "Span", diff --git a/tests/syntax/fixtures_structure/message_with_empty_pattern.json b/tests/syntax/fixtures_structure/message_with_empty_pattern.json index b9246378..eeeb0fa6 100644 --- a/tests/syntax/fixtures_structure/message_with_empty_pattern.json +++ b/tests/syntax/fixtures_structure/message_with_empty_pattern.json @@ -46,8 +46,8 @@ "message": "Expected message \"key2\" to have a value or attributes", "span": { "type": "Span", - "start": 344, - "end": 344 + "start": 343, + "end": 343 } } ], diff --git a/tests/syntax/fixtures_structure/multiline_pattern.ftl b/tests/syntax/fixtures_structure/multiline_pattern.ftl new file mode 100644 index 00000000..41611a04 --- /dev/null +++ b/tests/syntax/fixtures_structure/multiline_pattern.ftl @@ -0,0 +1,20 @@ +key01 = Value + Continued here. + +key02 = + Value + Continued here. + +# ERROR "Continued" looks like a new message. +# key03 parses fine with just "Value". +key03 = + Value +Continued here + and here. + +# ERROR "Continued" and "and" look like new messages +# key04 parses fine with just "Value". +key04 = + Value +Continued here +and even here. diff --git a/tests/syntax/fixtures_structure/multiline_pattern.json b/tests/syntax/fixtures_structure/multiline_pattern.json new file mode 100644 index 00000000..0df2d450 --- /dev/null +++ b/tests/syntax/fixtures_structure/multiline_pattern.json @@ -0,0 +1,250 @@ +{ + "body": [ + { + "comment": null, + "span": { + "start": 0, + "end": 33, + "type": "Span" + }, + "attributes": [], + "type": "Message", + "id": { + "type": "Identifier", + "span": { + "start": 0, + "end": 5, + "type": "Span" + }, + "name": "key01" + }, + "value": { + "type": "Pattern", + "elements": [ + { + "type": "TextElement", + "span": { + "start": 8, + "end": 33, + "type": "Span" + }, + "value": "Value\nContinued here." + } + ], + "span": { + "start": 8, + "end": 33, + "type": "Span" + } + } + }, + { + "comment": null, + "span": { + "start": 35, + "end": 72, + "type": "Span" + }, + "attributes": [], + "type": "Message", + "id": { + "type": "Identifier", + "span": { + "start": 35, + "end": 40, + "type": "Span" + }, + "name": "key02" + }, + "value": { + "type": "Pattern", + "elements": [ + { + "type": "TextElement", + "span": { + "start": 47, + "end": 72, + "type": "Span" + }, + "value": "Value\nContinued here." + } + ], + "span": { + "start": 47, + "end": 72, + "type": "Span" + } + } + }, + { + "comment": { + "content": "ERROR \"Continued\" looks like a new message.\nkey03 parses fine with just \"Value\".", + "type": "Comment", + "span": { + "start": 74, + "end": 158, + "type": "Span" + } + }, + "span": { + "start": 74, + "end": 176, + "type": "Span" + }, + "attributes": [], + "type": "Message", + "id": { + "type": "Identifier", + "span": { + "start": 159, + "end": 164, + "type": "Span" + }, + "name": "key03" + }, + "value": { + "type": "Pattern", + "elements": [ + { + "type": "TextElement", + "span": { + "start": 171, + "end": 176, + "type": "Span" + }, + "value": "Value" + } + ], + "span": { + "start": 171, + "end": 176, + "type": "Span" + } + } + }, + { + "content": "Continued here\n and here.\n\n", + "type": "Junk", + "span": { + "start": 177, + "end": 207, + "type": "Span" + }, + "annotations": [ + { + "type": "Annotation", + "message": "Expected message \"Continued\" to have a value or attributes", + "code": "E0005", + "span": { + "start": 187, + "end": 187, + "type": "Span" + }, + "args": [ + "Continued" + ] + } + ] + }, + { + "comment": { + "content": "ERROR \"Continued\" and \"and\" look like new messages\nkey04 parses fine with just \"Value\".", + "type": "Comment", + "span": { + "start": 207, + "end": 298, + "type": "Span" + } + }, + "span": { + "start": 207, + "end": 316, + "type": "Span" + }, + "attributes": [], + "type": "Message", + "id": { + "type": "Identifier", + "span": { + "start": 299, + "end": 304, + "type": "Span" + }, + "name": "key04" + }, + "value": { + "type": "Pattern", + "elements": [ + { + "type": "TextElement", + "span": { + "start": 311, + "end": 316, + "type": "Span" + }, + "value": "Value" + } + ], + "span": { + "start": 311, + "end": 316, + "type": "Span" + } + } + }, + { + "content": "Continued here\n", + "type": "Junk", + "span": { + "start": 317, + "end": 332, + "type": "Span" + }, + "annotations": [ + { + "type": "Annotation", + "message": "Expected message \"Continued\" to have a value or attributes", + "code": "E0005", + "span": { + "start": 327, + "end": 327, + "type": "Span" + }, + "args": [ + "Continued" + ] + } + ] + }, + { + "content": "and even here.\n", + "type": "Junk", + "span": { + "start": 332, + "end": 347, + "type": "Span" + }, + "annotations": [ + { + "type": "Annotation", + "message": "Expected message \"and\" to have a value or attributes", + "code": "E0005", + "span": { + "start": 336, + "end": 336, + "type": "Span" + }, + "args": [ + "and" + ] + } + ] + } + ], + "type": "Resource", + "span": { + "start": 0, + "end": 347, + "type": "Span" + } +} diff --git a/tests/syntax/fixtures_structure/sparse-messages.json b/tests/syntax/fixtures_structure/sparse-messages.json index b58ccaa3..90f8fdb3 100644 --- a/tests/syntax/fixtures_structure/sparse-messages.json +++ b/tests/syntax/fixtures_structure/sparse-messages.json @@ -248,7 +248,7 @@ { "type": "Variant", "key": { - "type": "VariantName", + "type": "Identifier", "name": "one", "span": { "type": "Span", @@ -285,7 +285,7 @@ { "type": "Variant", "key": { - "type": "VariantName", + "type": "Identifier", "name": "two", "span": { "type": "Span", @@ -364,7 +364,7 @@ { "type": "Variant", "key": { - "type": "VariantName", + "type": "Identifier", "name": "one", "span": { "type": "Span", @@ -430,7 +430,7 @@ { "type": "Variant", "key": { - "type": "VariantName", + "type": "Identifier", "name": "one", "span": { "type": "Span", diff --git a/tests/syntax/fixtures_structure/term.json b/tests/syntax/fixtures_structure/term.json index 4250e4c6..f9cf94d4 100644 --- a/tests/syntax/fixtures_structure/term.json +++ b/tests/syntax/fixtures_structure/term.json @@ -18,7 +18,7 @@ { "type": "Variant", "key": { - "type": "VariantName", + "type": "Identifier", "name": "nominative", "span": { "type": "Span", @@ -55,7 +55,7 @@ { "type": "Variant", "key": { - "type": "VariantName", + "type": "Identifier", "name": "accusative", "span": { "type": "Span", @@ -186,7 +186,7 @@ } }, "key": { - "type": "VariantName", + "type": "Identifier", "name": "accusative", "span": { "type": "Span", @@ -286,7 +286,7 @@ { "type": "Variant", "key": { - "type": "VariantName", + "type": "Identifier", "name": "masculine", "span": { "type": "Span", @@ -348,7 +348,7 @@ { "type": "Variant", "key": { - "type": "VariantName", + "type": "Identifier", "name": "feminine", "span": { "type": "Span", @@ -410,7 +410,7 @@ { "type": "Variant", "key": { - "type": "VariantName", + "type": "Identifier", "name": "other", "span": { "type": "Span", diff --git a/tests/syntax/fixtures_structure/unclosed.ftl b/tests/syntax/fixtures_structure/unclosed.ftl new file mode 100644 index 00000000..0fe9fd44 --- /dev/null +++ b/tests/syntax/fixtures_structure/unclosed.ftl @@ -0,0 +1,12 @@ +err01 = { +key02 = Value 02 + +err03 = { +FUNC( +arg +, +namedArg: "Value" +, +key04 = Value 04 +) +} diff --git a/tests/syntax/fixtures_structure/unclosed.json b/tests/syntax/fixtures_structure/unclosed.json new file mode 100644 index 00000000..f492a5e0 --- /dev/null +++ b/tests/syntax/fixtures_structure/unclosed.json @@ -0,0 +1,154 @@ +{ + "type": "Resource", + "body": [ + { + "type": "Junk", + "annotations": [ + { + "type": "Annotation", + "code": "E0003", + "args": [ + "}" + ], + "message": "Expected token: \"}\"", + "span": { + "type": "Span", + "start": 10, + "end": 10 + } + } + ], + "content": "err01 = {\n", + "span": { + "type": "Span", + "start": 0, + "end": 10 + } + }, + { + "type": "Message", + "id": { + "type": "Identifier", + "name": "key02", + "span": { + "type": "Span", + "start": 10, + "end": 15 + } + }, + "value": { + "type": "Pattern", + "elements": [ + { + "type": "TextElement", + "value": "Value 02", + "span": { + "type": "Span", + "start": 18, + "end": 26 + } + } + ], + "span": { + "type": "Span", + "start": 18, + "end": 26 + } + }, + "attributes": [], + "comment": null, + "span": { + "type": "Span", + "start": 10, + "end": 26 + } + }, + { + "type": "Junk", + "annotations": [ + { + "type": "Annotation", + "code": "E0021", + "args": [], + "message": "Positional arguments must not follow named arguments", + "span": { + "type": "Span", + "start": 70, + "end": 70 + } + } + ], + "content": "err03 = {\nFUNC(\narg\n,\nnamedArg: \"Value\"\n,\n", + "span": { + "type": "Span", + "start": 28, + "end": 70 + } + }, + { + "type": "Message", + "id": { + "type": "Identifier", + "name": "key04", + "span": { + "type": "Span", + "start": 70, + "end": 75 + } + }, + "value": { + "type": "Pattern", + "elements": [ + { + "type": "TextElement", + "value": "Value 04", + "span": { + "type": "Span", + "start": 78, + "end": 86 + } + } + ], + "span": { + "type": "Span", + "start": 78, + "end": 86 + } + }, + "attributes": [], + "comment": null, + "span": { + "type": "Span", + "start": 70, + "end": 86 + } + }, + { + "type": "Junk", + "annotations": [ + { + "type": "Annotation", + "code": "E0002", + "args": [], + "message": "Expected an entry start", + "span": { + "type": "Span", + "start": 87, + "end": 87 + } + } + ], + "content": ")\n}\n", + "span": { + "type": "Span", + "start": 87, + "end": 91 + } + } + ], + "span": { + "type": "Span", + "start": 0, + "end": 91 + } +} diff --git a/tests/syntax/fixtures_structure/variant_with_empty_pattern.json b/tests/syntax/fixtures_structure/variant_with_empty_pattern.json index 007f49f8..19fe137d 100644 --- a/tests/syntax/fixtures_structure/variant_with_empty_pattern.json +++ b/tests/syntax/fixtures_structure/variant_with_empty_pattern.json @@ -32,7 +32,7 @@ { "type": "Variant", "key": { - "type": "VariantName", + "type": "Identifier", "name": "one", "span": { "type": "Span", diff --git a/tests/syntax/test_serializer.py b/tests/syntax/test_serializer.py index 66a7fd58..08570b14 100644 --- a/tests/syntax/test_serializer.py +++ b/tests/syntax/test_serializer.py @@ -269,15 +269,6 @@ def test_variant_multiline_first_inline(self): """ self.assertEqual(self.pretty_ftl(input), dedent_ftl(output)) - def test_variant_key_words(self): - input = """\ - foo = - { $sel -> - *[a b c] A B C - } - """ - self.assertEqual(self.pretty_ftl(input), dedent_ftl(input)) - def test_variant_key_number(self): input = """\ foo = diff --git a/tests/syntax/test_stream.py b/tests/syntax/test_stream.py index cb6a8b96..6c05f0ef 100644 --- a/tests/syntax/test_stream.py +++ b/tests/syntax/test_stream.py @@ -10,91 +10,91 @@ class TestParserStream(unittest.TestCase): def test_next(self): ps = ParserStream("abcd") - self.assertEqual('a', ps.current()) - self.assertEqual(0, ps.get_index()) + self.assertEqual('a', ps.current_char) + self.assertEqual(0, ps.index) self.assertEqual('b', ps.next()) - self.assertEqual('b', ps.current()) - self.assertEqual(1, ps.get_index()) + self.assertEqual('b', ps.current_char) + self.assertEqual(1, ps.index) self.assertEqual('c', ps.next()) - self.assertEqual('c', ps.current()) - self.assertEqual(2, ps.get_index()) + self.assertEqual('c', ps.current_char) + self.assertEqual(2, ps.index) self.assertEqual('d', ps.next()) - self.assertEqual('d', ps.current()) - self.assertEqual(3, ps.get_index()) + self.assertEqual('d', ps.current_char) + self.assertEqual(3, ps.index) self.assertEqual(None, ps.next()) - self.assertEqual(None, ps.current()) - self.assertEqual(4, ps.get_index()) + self.assertEqual(None, ps.current_char) + self.assertEqual(4, ps.index) def test_peek(self): ps = ParserStream("abcd") - self.assertEqual('a', ps.current_peek()) - self.assertEqual(0, ps.get_peek_index()) + self.assertEqual('a', ps.current_peek) + self.assertEqual(0, ps.peek_offset) self.assertEqual('b', ps.peek()) - self.assertEqual('b', ps.current_peek()) - self.assertEqual(1, ps.get_peek_index()) + self.assertEqual('b', ps.current_peek) + self.assertEqual(1, ps.peek_offset) self.assertEqual('c', ps.peek()) - self.assertEqual('c', ps.current_peek()) - self.assertEqual(2, ps.get_peek_index()) + self.assertEqual('c', ps.current_peek) + self.assertEqual(2, ps.peek_offset) self.assertEqual('d', ps.peek()) - self.assertEqual('d', ps.current_peek()) - self.assertEqual(3, ps.get_peek_index()) + self.assertEqual('d', ps.current_peek) + self.assertEqual(3, ps.peek_offset) self.assertEqual(None, ps.peek()) - self.assertEqual(None, ps.current_peek()) - self.assertEqual(4, ps.get_peek_index()) + self.assertEqual(None, ps.current_peek) + self.assertEqual(4, ps.peek_offset) def test_peek_and_next(self): ps = ParserStream("abcd") self.assertEqual('b', ps.peek()) - self.assertEqual(1, ps.get_peek_index()) - self.assertEqual(0, ps.get_index()) + self.assertEqual(1, ps.peek_offset) + self.assertEqual(0, ps.index) self.assertEqual('b', ps.next()) - self.assertEqual(1, ps.get_peek_index()) - self.assertEqual(1, ps.get_index()) + self.assertEqual(0, ps.peek_offset) + self.assertEqual(1, ps.index) self.assertEqual('c', ps.peek()) - self.assertEqual(2, ps.get_peek_index()) - self.assertEqual(1, ps.get_index()) + self.assertEqual(1, ps.peek_offset) + self.assertEqual(1, ps.index) self.assertEqual('c', ps.next()) - self.assertEqual(2, ps.get_peek_index()) - self.assertEqual(2, ps.get_index()) - self.assertEqual('c', ps.current()) - self.assertEqual('c', ps.current_peek()) + self.assertEqual(0, ps.peek_offset) + self.assertEqual(2, ps.index) + self.assertEqual('c', ps.current_char) + self.assertEqual('c', ps.current_peek) self.assertEqual('d', ps.peek()) - self.assertEqual(3, ps.get_peek_index()) - self.assertEqual(2, ps.get_index()) + self.assertEqual(1, ps.peek_offset) + self.assertEqual(2, ps.index) self.assertEqual('d', ps.next()) - self.assertEqual(3, ps.get_peek_index()) - self.assertEqual(3, ps.get_index()) - self.assertEqual('d', ps.current()) - self.assertEqual('d', ps.current_peek()) + self.assertEqual(0, ps.peek_offset) + self.assertEqual(3, ps.index) + self.assertEqual('d', ps.current_char) + self.assertEqual('d', ps.current_peek) self.assertEqual(None, ps.peek()) - self.assertEqual(4, ps.get_peek_index()) - self.assertEqual(3, ps.get_index()) - self.assertEqual('d', ps.current()) - self.assertEqual(None, ps.current_peek()) + self.assertEqual(1, ps.peek_offset) + self.assertEqual(3, ps.index) + self.assertEqual('d', ps.current_char) + self.assertEqual(None, ps.current_peek) self.assertEqual(None, ps.peek()) - self.assertEqual(4, ps.get_peek_index()) - self.assertEqual(3, ps.get_index()) + self.assertEqual(2, ps.peek_offset) + self.assertEqual(3, ps.index) self.assertEqual(None, ps.next()) - self.assertEqual(4, ps.get_peek_index()) - self.assertEqual(4, ps.get_index()) + self.assertEqual(0, ps.peek_offset) + self.assertEqual(4, ps.index) def test_skip_to_peek(self): ps = ParserStream("abcd") @@ -104,24 +104,24 @@ def test_skip_to_peek(self): ps.skip_to_peek() - self.assertEqual('c', ps.current()) - self.assertEqual('c', ps.current_peek()) - self.assertEqual(2, ps.get_peek_index()) - self.assertEqual(2, ps.get_index()) + self.assertEqual('c', ps.current_char) + self.assertEqual('c', ps.current_peek) + self.assertEqual(0, ps.peek_offset) + self.assertEqual(2, ps.index) ps.peek() - self.assertEqual('c', ps.current()) - self.assertEqual('d', ps.current_peek()) - self.assertEqual(3, ps.get_peek_index()) - self.assertEqual(2, ps.get_index()) + self.assertEqual('c', ps.current_char) + self.assertEqual('d', ps.current_peek) + self.assertEqual(1, ps.peek_offset) + self.assertEqual(2, ps.index) ps.next() - self.assertEqual('d', ps.current()) - self.assertEqual('d', ps.current_peek()) - self.assertEqual(3, ps.get_peek_index()) - self.assertEqual(3, ps.get_index()) + self.assertEqual('d', ps.current_char) + self.assertEqual('d', ps.current_peek) + self.assertEqual(0, ps.peek_offset) + self.assertEqual(3, ps.index) def test_reset_peek(self): ps = ParserStream("abcd") @@ -131,53 +131,37 @@ def test_reset_peek(self): ps.peek() ps.reset_peek() - self.assertEqual('b', ps.current()) - self.assertEqual('b', ps.current_peek()) - self.assertEqual(1, ps.get_peek_index()) - self.assertEqual(1, ps.get_index()) + self.assertEqual('b', ps.current_char) + self.assertEqual('b', ps.current_peek) + self.assertEqual(0, ps.peek_offset) + self.assertEqual(1, ps.index) ps.peek() - self.assertEqual('b', ps.current()) - self.assertEqual('c', ps.current_peek()) - self.assertEqual(2, ps.get_peek_index()) - self.assertEqual(1, ps.get_index()) + self.assertEqual('b', ps.current_char) + self.assertEqual('c', ps.current_peek) + self.assertEqual(1, ps.peek_offset) + self.assertEqual(1, ps.index) ps.peek() ps.peek() ps.peek() ps.reset_peek() - self.assertEqual('b', ps.current()) - self.assertEqual('b', ps.current_peek()) - self.assertEqual(1, ps.get_peek_index()) - self.assertEqual(1, ps.get_index()) + self.assertEqual('b', ps.current_char) + self.assertEqual('b', ps.current_peek) + self.assertEqual(0, ps.peek_offset) + self.assertEqual(1, ps.index) self.assertEqual('c', ps.peek()) - self.assertEqual('b', ps.current()) - self.assertEqual('c', ps.current_peek()) - self.assertEqual(2, ps.get_peek_index()) - self.assertEqual(1, ps.get_index()) + self.assertEqual('b', ps.current_char) + self.assertEqual('c', ps.current_peek) + self.assertEqual(1, ps.peek_offset) + self.assertEqual(1, ps.index) self.assertEqual('d', ps.peek()) self.assertEqual(None, ps.peek()) - def test_reset_peek(self): - ps = ParserStream("abcd") - - ps.next() - ps.peek() - - self.assertEqual(ps.peek_char_is('d'), True) - - self.assertEqual('b', ps.current()) - self.assertEqual('c', ps.current_peek()) - - ps.skip_to_peek() - - self.assertEqual('c', ps.current()) - - if __name__ == '__main__': unittest.main()