From a8cb885302cf1d77187b9bd3ecb824d8a85925ab Mon Sep 17 00:00:00 2001 From: Addison Phillips Date: Mon, 10 Apr 2023 15:32:41 +0200 Subject: [PATCH 01/12] Update message.abnf This proposal proposes to reserve sigils for future use with reserved sigils allowed in the same place that function names are currently, e.g. standalone (`{%foo}`), with options (`{%foo key=val}`), or subsidiary to variable/literals (`{|quote me| %foo}` or `{$var %foo}`). A case could be made for reserving a comment syntax that allows a placeholder with prose in it (and I would suggest using the `%` for it). @eemeli's proposal is effectively this. I didn't propose this because of the impact on parsers in the future. Let's discuss. --- spec/message.abnf | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/spec/message.abnf b/spec/message.abnf index 66512a3189..dc09c99ff1 100644 --- a/spec/message.abnf +++ b/spec/message.abnf @@ -11,12 +11,10 @@ variant = when 1*(s key) [s] pattern key = nmtoken / literal / "*" placeholder = "{" [s] expression [s] "}" - / "{" [s] markup-start *(s option) [s] "}" - / "{" [s] markup-end [s] "}" expression = ((literal / variable) [s annotation]) / annotation -annotation = function *(s option) +annotation = (function / markup / reserved) *(s option) option = name [s] "=" [s] (literal / nmtoken / variable) ; reserved keywords are always lowercase @@ -39,8 +37,8 @@ literal-char = %x0-5B ; omit \ variable = "$" name function = ":" name -markup-start = "+" name -markup-end = "-" name +markup = ("+" / "-") name +reserved = ("!" / "@" / "#" / "$" / "%" / "^" / "&" / "*" / "<" / ">" / "?") name name = name-start *name-char ; matches XML https://www.w3.org/TR/xml/#NT-Name nmtoken = 1*name-char ; matches XML https://www.w3.org/TR/xml/#NT-Nmtokens From 0e7454a69e2a46e7ce619ed11487462e52a68408 Mon Sep 17 00:00:00 2001 From: Addison Phillips Date: Sat, 15 Apr 2023 12:25:56 +0200 Subject: [PATCH 02/12] Update spec/message.abnf I think this works. I'm going to commit it to the PR and then do some additional cleanup. Co-authored-by: Eemeli Aro --- spec/message.abnf | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/spec/message.abnf b/spec/message.abnf index 0b383de68d..77d30a804a 100644 --- a/spec/message.abnf +++ b/spec/message.abnf @@ -35,8 +35,14 @@ literal-char = %x0-5B ; omit \ variable = "$" name ; reserve additional sigils for future use -reserved = reserved-start text -reserved-start = ("!" / "@" / "#" / "%" / "^" / "&" / "*" / "<" / ">" / "?" / "~") +reserved = reserved-start reserved-body +reserved-start = "!" / "@" / "#" / "%" / "^" / "&" / "*" / "<" / ">" / "?" / "~" +reserved-body = *(reserved-char / reserved-escape / literal) +reserved-char = %x0-5B ; omit \ + / %x5D-7A ; omit { | } + / %x7E-D7FF ; omit surrogates + / %xE000-10FFFF +reserved-escape = backslash ( backslash / "{" / "|" / "}" ) function = (":" | "+" | "-") name From 60dc2eec43abc94cecb8436f5db91a8ad2f4e800 Mon Sep 17 00:00:00 2001 From: Addison Phillips Date: Sat, 15 Apr 2023 12:30:51 +0200 Subject: [PATCH 03/12] Cleanup - align some items - move `function` with `variable` - move `reserve-escape` with the other escapes --- spec/message.abnf | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/spec/message.abnf b/spec/message.abnf index 77d30a804a..2165e16b88 100644 --- a/spec/message.abnf +++ b/spec/message.abnf @@ -26,25 +26,23 @@ text-char = %x0-5B ; omit \ / %x7E-D7FF ; omit surrogates / %xE000-10FFFF -literal = "|" *(literal-char / literal-escape) "|" +literal = "|" *(literal-char / literal-escape) "|" literal-char = %x0-5B ; omit \ / %x5D-7B ; omit | / %x7D-D7FF ; omit surrogates / %xE000-10FFFF variable = "$" name +function = (":" | "+" | "-") name ; reserve additional sigils for future use -reserved = reserved-start reserved-body +reserved = reserved-start reserved-body reserved-start = "!" / "@" / "#" / "%" / "^" / "&" / "*" / "<" / ">" / "?" / "~" -reserved-body = *(reserved-char / reserved-escape / literal) -reserved-char = %x0-5B ; omit \ - / %x5D-7A ; omit { | } - / %x7E-D7FF ; omit surrogates - / %xE000-10FFFF -reserved-escape = backslash ( backslash / "{" / "|" / "}" ) - -function = (":" | "+" | "-") name +reserved-body = *(reserved-char / reserved-escape / literal) +reserved-char = %x0-5B ; omit \ + / %x5D-7A ; omit { | } + / %x7E-D7FF ; omit surrogates + / %xE000-10FFFF name = name-start *name-char ; matches XML https://www.w3.org/TR/xml/#NT-Name nmtoken = 1*name-char ; matches XML https://www.w3.org/TR/xml/#NT-Nmtokens @@ -56,8 +54,9 @@ name-start = ALPHA / "_" name-char = name-start / DIGIT / "-" / "." / %xB7 / %x0300-036F / %x203F-2040 -text-escape = backslash ( backslash / "{" / "}" ) -literal-escape = backslash ( backslash / "|" ) -backslash = %x5C ; U+005C REVERSE SOLIDUS "\" +text-escape = backslash ( backslash / "{" / "}" ) +literal-escape = backslash ( backslash / "|" ) +reserved-escape = backslash ( backslash / "{" / "|" / "|" ) +backslash = %x5C ; U+005C REVERSE SOLIDUS "\" s = 1*( SP / HTAB / CR / LF ) From aef7a279c43b52381abd978c8becb79073fc3aca Mon Sep 17 00:00:00 2001 From: Addison Phillips Date: Sat, 15 Apr 2023 12:31:57 +0200 Subject: [PATCH 04/12] Fix typo --- spec/message.abnf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/message.abnf b/spec/message.abnf index 2165e16b88..223bf7ba22 100644 --- a/spec/message.abnf +++ b/spec/message.abnf @@ -56,7 +56,7 @@ name-char = name-start / DIGIT / "-" / "." / %xB7 text-escape = backslash ( backslash / "{" / "}" ) literal-escape = backslash ( backslash / "|" ) -reserved-escape = backslash ( backslash / "{" / "|" / "|" ) +reserved-escape = backslash ( backslash / "{" / "|" / "}" ) backslash = %x5C ; U+005C REVERSE SOLIDUS "\" s = 1*( SP / HTAB / CR / LF ) From 4a7315862935e7571d2573611179d5d6cd587b05 Mon Sep 17 00:00:00 2001 From: Addison Phillips Date: Sat, 15 Apr 2023 12:51:09 +0200 Subject: [PATCH 05/12] Add `reserved` to the spec Note that this change also removed the undefined term "operand" in favor of just using literal and variable (which are defined terms). It also imposes 2119 keywords. --- spec/syntax.md | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/spec/syntax.md b/spec/syntax.md index f99feb0509..09b74c856a 100644 --- a/spec/syntax.md +++ b/spec/syntax.md @@ -345,19 +345,18 @@ Whitespace within a _pattern_ is meaningful and MUST be preserved. ### Expressions -**_Expressions_** can either start with an operand or a function call. +_Expressions_ ***must*** start with a _literal_, a _variable_, or an _annotation_, or consist of a _reserved_ string. An _expression_ ***must not*** be empty. -The operand is a literal or a variable name. -The operand can be optionally followed by an _annotation_: -a function and its named options. -Functions do not accept any positional arguments -other than the operand in front of them. +A _literal_ or _variable_ ***may*** be optionally followed by an _annotation_. -Function calls do not require an operand as an argument, -but an _expression_ must not be completely empty. +An _annotation_ consists of a _function_ and its named _options_. + +_Functions_ do not accept any positional arguments other than the _literal_ or _variable_ in front of them. + +_Reserved_ sequences start with a reserved character and are intended for future standardization. ```abnf -expression = "{" [s] (((literal / variable) [s annotation]) / annotation) [s] "}" +expression = "{" [s] (((literal / variable) [s annotation]) / annotation / reserved) [s] "}" annotation = function *(s option) option = name [s] "=" [s] (literal / nmtoken / variable) ``` From a8b8b4838e17acc8c94b3b520f88e7f3fefb546a Mon Sep 17 00:00:00 2001 From: Addison Phillips Date: Sun, 16 Apr 2023 12:13:30 +0200 Subject: [PATCH 06/12] allow passing values to reserved --- spec/message.abnf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spec/message.abnf b/spec/message.abnf index 223bf7ba22..89c227f965 100644 --- a/spec/message.abnf +++ b/spec/message.abnf @@ -9,8 +9,8 @@ selectors = match 1*([s] expression) variant = when 1*(s key) [s] pattern key = nmtoken / literal / "*" -expression = "{" [s] (((literal / variable) [s annotation]) / annotation / reserved) [s] "}" -annotation = function *(s option) +expression = "{" [s] (((literal / variable) [s annotation]) / annotation) [s] "}" +annotation = (function *(s option)) / reserved option = name [s] "=" [s] (literal / nmtoken / variable) From 0df07f49b4696f3a6d2dd7a7a2b6673f63cb2398 Mon Sep 17 00:00:00 2001 From: Addison Phillips Date: Sun, 16 Apr 2023 12:27:39 +0200 Subject: [PATCH 07/12] Add warning about whitespace significance in `reserved` --- spec/syntax.md | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/spec/syntax.md b/spec/syntax.md index 09b74c856a..e823853346 100644 --- a/spec/syntax.md +++ b/spec/syntax.md @@ -317,7 +317,7 @@ A _well-formed_ message is considered _valid_ if the following requirements are ### Patterns A **_pattern_** is a sequence of translatable elements. -Patterns MUST BE delimited with `{` at the start, and `}` at the end. +Patterns MUST be delimited with `{` at the start, and `}` at the end. This serves 3 purposes: - The message can be unambiguously embeddable in various container formats @@ -356,8 +356,8 @@ _Functions_ do not accept any positional arguments other than the _literal_ or _ _Reserved_ sequences start with a reserved character and are intended for future standardization. ```abnf -expression = "{" [s] (((literal / variable) [s annotation]) / annotation / reserved) [s] "}" -annotation = function *(s option) +expression = "{" [s] (((literal / variable) [s annotation]) / annotation) [s] "}" +annotation = (function *(s option)) / reserved option = name [s] "=" [s] (literal / nmtoken / variable) ``` @@ -477,12 +477,12 @@ name-char = name-start / DIGIT / "-" / "." / %xB7 ### Escape Sequences -Escape sequences are introduced by the backslash character (`\`). -They are allowed in translatable text as well as in literals. +Escape sequences are introduced by the backslash character (`\`) and allow the appearance of lexically meaningful characters in the body of `text`, `literal`, or `reserved` sequences respectively: ```abnf text-escape = backslash ( backslash / "{" / "}" ) literal-escape = backslash ( backslash / "|" ) +reserve-escape = backslash ( backslash / "{" / "|" / "}" ) backslash = %x5C ; U+005C REVERSE SOLIDUS "\" ``` @@ -494,6 +494,8 @@ Inside _patterns_, whitespace is part of the translatable content and is recorded and stored verbatim. Whitespace is not significant outside translatable text, except where required by the syntax. +***NOTE:*** Whitespace **_is_** significant in the `reserved` production and implementations need to be careful not to trim trailing whitespace from reserved sequences. + ```abnf s = 1*( SP / HTAB / CR / LF ) ``` From a3dcef40475914b247632a7fd6562409790a5803 Mon Sep 17 00:00:00 2001 From: Addison Phillips Date: Mon, 17 Apr 2023 12:26:19 +0200 Subject: [PATCH 08/12] Fix expression links, add more about reserved --- spec/syntax.md | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/spec/syntax.md b/spec/syntax.md index e823853346..28bdebf8b3 100644 --- a/spec/syntax.md +++ b/spec/syntax.md @@ -7,7 +7,7 @@ 1. [Design Restrictions](#design-restrictions) 1. [Overview & Examples](#overview--examples) 1. [Messages](#messages) - 1. [Expressions](#expressions) + 1. [Expressions](#placeholder-expressions) 1. [Formatting Functions](#formatting-functions) 1. [Selection](#selection) 1. [Local Variables](#local-variables) @@ -19,6 +19,7 @@ 1. [Variants](#variants) 1. [Patterns](#patterns) 1. [Expressions](#expressions) + 1. [Reserved Sequences](#reserved) 1. [Tokens](#tokens) 1. [Keywords](#keywords) 1. [Text and Literals](#text-and-literals) @@ -108,7 +109,7 @@ let hello = new MessageFormat('{Hello, world!}') hello.format() ``` -### Expressions +### Placeholder Expressions An _expression_ represents a part of a message that will be determined during the message's formatting. @@ -353,8 +354,6 @@ An _annotation_ consists of a _function_ and its named _options_. _Functions_ do not accept any positional arguments other than the _literal_ or _variable_ in front of them. -_Reserved_ sequences start with a reserved character and are intended for future standardization. - ```abnf expression = "{" [s] (((literal / variable) [s annotation]) / annotation) [s] "}" annotation = (function *(s option)) / reserved @@ -397,6 +396,10 @@ Message examples: {{+h1 name=above-and-beyond}Above And Beyond{-h1}} ``` +#### Reserved + +_Reserved_ sequences start with a reserved character and are intended for future standardization. A reserved sequence can contain arbitrary text in which whitespace _is_ significant. The reserved sequence terminates with the end of the expression in which it appears. While a reserved sequence is technically "well-formed", unrecognized reserved sequences have no meaning and implementations **_may_** emit an error when encountered during formatting. + ## Tokens The grammar defines the following tokens for the purpose of the lexical analysis. From da35503e0197c000d25b6b456d3b718ca2753627 Mon Sep 17 00:00:00 2001 From: Addison Phillips Date: Tue, 18 Apr 2023 10:37:02 +0200 Subject: [PATCH 09/12] Address comments --- spec/syntax.md | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/spec/syntax.md b/spec/syntax.md index 28bdebf8b3..4e19bb9c0d 100644 --- a/spec/syntax.md +++ b/spec/syntax.md @@ -7,7 +7,7 @@ 1. [Design Restrictions](#design-restrictions) 1. [Overview & Examples](#overview--examples) 1. [Messages](#messages) - 1. [Expressions](#placeholder-expressions) + 1. [Expressions](#expression) 1. [Formatting Functions](#formatting-functions) 1. [Selection](#selection) 1. [Local Variables](#local-variables) @@ -109,7 +109,7 @@ let hello = new MessageFormat('{Hello, world!}') hello.format() ``` -### Placeholder Expressions +### Expression An _expression_ represents a part of a message that will be determined during the message's formatting. @@ -346,11 +346,11 @@ Whitespace within a _pattern_ is meaningful and MUST be preserved. ### Expressions -_Expressions_ ***must*** start with a _literal_, a _variable_, or an _annotation_, or consist of a _reserved_ string. An _expression_ ***must not*** be empty. +_Expressions_ ***must*** start with a _literal_, a _variable_, or an _annotation_. An _expression_ ***must not*** be empty. A _literal_ or _variable_ ***may*** be optionally followed by an _annotation_. -An _annotation_ consists of a _function_ and its named _options_. +An _annotation_ consists of a _function_ and its named _options_, or consists of a _reserved_ sequence. _Functions_ do not accept any positional arguments other than the _literal_ or _variable_ in front of them. @@ -398,7 +398,10 @@ Message examples: #### Reserved -_Reserved_ sequences start with a reserved character and are intended for future standardization. A reserved sequence can contain arbitrary text in which whitespace _is_ significant. The reserved sequence terminates with the end of the expression in which it appears. While a reserved sequence is technically "well-formed", unrecognized reserved sequences have no meaning and implementations **_may_** emit an error when encountered during formatting. +_Reserved_ sequences start with a reserved character and are intended for future standardization. +A reserved sequence can be empty or contain arbitrary text. +A reserved sequence does not include any trailing whitespace. +While a reserved sequence is technically "well-formed", unrecognized reserved sequences have no meaning and might result in errors during formatting. ## Tokens From 0d61ad4ded6a8f9bd5a58f7bdee15ee2bd60b5fd Mon Sep 17 00:00:00 2001 From: Addison Phillips Date: Tue, 18 Apr 2023 11:01:48 +0200 Subject: [PATCH 10/12] Make spaces insignificant at the end of reserved sequences --- spec/message.abnf | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/spec/message.abnf b/spec/message.abnf index 89c227f965..03e4ec1af1 100644 --- a/spec/message.abnf +++ b/spec/message.abnf @@ -38,8 +38,11 @@ function = (":" | "+" | "-") name ; reserve additional sigils for future use reserved = reserved-start reserved-body reserved-start = "!" / "@" / "#" / "%" / "^" / "&" / "*" / "<" / ">" / "?" / "~" -reserved-body = *(reserved-char / reserved-escape / literal) -reserved-char = %x0-5B ; omit \ +reserved-body = *( [s] *(reserved-char / reserved-escape / literal)) +reserved-char = %x00-08 ; omit HTAB and LF + / %x0B-0C ; omit CR + / %x0E-19 ; omit SP + / %x21-5B ; omit \ / %x5D-7A ; omit { | } / %x7E-D7FF ; omit surrogates / %xE000-10FFFF From 510f8728a707870fba478a71a7ca91f0c87c6457 Mon Sep 17 00:00:00 2001 From: Addison Phillips Date: Tue, 18 Apr 2023 11:07:10 +0200 Subject: [PATCH 11/12] Remove the warning about trailing whitespace on reserved --- spec/syntax.md | 1 - 1 file changed, 1 deletion(-) diff --git a/spec/syntax.md b/spec/syntax.md index 4e19bb9c0d..f3f552ebcc 100644 --- a/spec/syntax.md +++ b/spec/syntax.md @@ -500,7 +500,6 @@ Inside _patterns_, whitespace is part of the translatable content and is recorded and stored verbatim. Whitespace is not significant outside translatable text, except where required by the syntax. -***NOTE:*** Whitespace **_is_** significant in the `reserved` production and implementations need to be careful not to trim trailing whitespace from reserved sequences. ```abnf s = 1*( SP / HTAB / CR / LF ) From 27bf944ffb7184ec308fe79c92ba43f23871914b Mon Sep 17 00:00:00 2001 From: Addison Phillips Date: Tue, 18 Apr 2023 11:10:16 +0200 Subject: [PATCH 12/12] Fix reserved to ensure no ending whitespace --- spec/message.abnf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/message.abnf b/spec/message.abnf index 03e4ec1af1..32cd24c942 100644 --- a/spec/message.abnf +++ b/spec/message.abnf @@ -38,7 +38,7 @@ function = (":" | "+" | "-") name ; reserve additional sigils for future use reserved = reserved-start reserved-body reserved-start = "!" / "@" / "#" / "%" / "^" / "&" / "*" / "<" / ">" / "?" / "~" -reserved-body = *( [s] *(reserved-char / reserved-escape / literal)) +reserved-body = *( [s] 1*(reserved-char / reserved-escape / literal)) reserved-char = %x00-08 ; omit HTAB and LF / %x0B-0C ; omit CR / %x0E-19 ; omit SP