From 5978487ce98983fe968004fb4a173bfffbcd0c21 Mon Sep 17 00:00:00 2001 From: Dmitry Khalanskiy Date: Tue, 5 Mar 2024 11:45:08 +0100 Subject: [PATCH 1/3] Fix not being able to parse some separator-free formats The reproducer was: ``` offsetHours(Padding.NONE) optional { optional { char(':') } offsetMinutesOfHour() } ``` This showed us one bug and one inefficiency, both of which are fixed here. Inefficiency: the `optional { char(':') }` should for all intents and purposes be equivalent to `alternativeParsing({ char(':') }) { }`: both the empty string and the `:` should be parsed, and, according to `optional`'s definition, if none of the fields mentioned in the block are non-zero, nothing should be output. However, such `optional` still created a fake parser element that notified all the fields that they are zero when an empty string is parsed, even though there are no fields. Bug: the fake parser element that notifies the fields that they are zero even though nothing of note was parsed interfered with the mechanism supporting compact formats. Number parsers are greedy, so `number(hh); number(mm)` gets merged into `number(hhmm)`. If there is something between them, this merge can't happen: `number(hh); string("x"); number(mm)`. For this reason, parsers that don't accept any strings are forbidden (or just very unlikely to happen)--except for the zero-width unconditional modification parser. This bug is fixed by moving such parsers to the end of the parser: `number(hh); modification(); number(mm); string("x")` will get transformed to `number(hhmm); string("x"); modification()`. --- .../src/internal/format/FormatStructure.kt | 30 ++++++++++++------- .../src/internal/format/parser/Parser.kt | 15 +++++++--- core/common/test/format/DateTimeFormatTest.kt | 12 ++++++++ 3 files changed, 42 insertions(+), 15 deletions(-) diff --git a/core/common/src/internal/format/FormatStructure.kt b/core/common/src/internal/format/FormatStructure.kt index 495d791e1..64644e8c2 100644 --- a/core/common/src/internal/format/FormatStructure.kt +++ b/core/common/src/internal/format/FormatStructure.kt @@ -160,13 +160,17 @@ internal class OptionalFormatStructure( listOf( ConstantFormatStructure(onZero).parser(), ParserStructure( - listOf( - UnconditionalModification { - for (field in fields) { - field.assignDefault(it) + if (fields.isEmpty()) { + emptyList() + } else { + listOf( + UnconditionalModification { + for (field in fields) { + field.assignDefault(it) + } } - } - ), + ) + }, emptyList() ) ).concat() @@ -176,12 +180,16 @@ internal class OptionalFormatStructure( override fun formatter(): FormatterStructure { val formatter = format.formatter() val predicate = conjunctionPredicate(fields.map { it.isDefaultComparisonPredicate() }) - return ConditionalFormatter( - listOf( - predicate::test to ConstantStringFormatterStructure(onZero), - Truth::test to formatter + return if (predicate is Truth) { + ConstantStringFormatterStructure(onZero) + } else { + ConditionalFormatter( + listOf( + predicate::test to ConstantStringFormatterStructure(onZero), + Truth::test to formatter + ) ) - ) + } } private class PropertyWithDefault private constructor( diff --git a/core/common/src/internal/format/parser/Parser.kt b/core/common/src/internal/format/parser/Parser.kt index 24a1d444d..9958e3fb9 100644 --- a/core/common/src/internal/format/parser/Parser.kt +++ b/core/common/src/internal/format/parser/Parser.kt @@ -49,10 +49,12 @@ internal fun List>.concat(): ParserStructure { ParserStructure(operations, followedBy.map { it.append(other) }) } - fun ParserStructure.simplify(): ParserStructure { + fun ParserStructure.simplify(unconditionalModifications: List>): ParserStructure { val newOperations = mutableListOf>() var currentNumberSpan: MutableList>? = null - // joining together the number consumers in this parser before the first alternative + val unconditionalModificationsForTails = unconditionalModifications.toMutableList() + // joining together the number consumers in this parser before the first alternative; + // collecting the unconditional modifications to push them to the end of all the parser's branches. for (op in operations) { if (op is NumberSpanParserOperation) { if (currentNumberSpan != null) { @@ -60,6 +62,8 @@ internal fun List>.concat(): ParserStructure { } else { currentNumberSpan = op.consumers.toMutableList() } + } else if (op is UnconditionalModification) { + unconditionalModificationsForTails.add(op) } else { if (currentNumberSpan != null) { newOperations.add(NumberSpanParserOperation(currentNumberSpan)) @@ -69,7 +73,7 @@ internal fun List>.concat(): ParserStructure { } } val mergedTails = followedBy.flatMap { - val simplified = it.simplify() + val simplified = it.simplify(unconditionalModificationsForTails) // parser `ParserStructure(emptyList(), p)` is equivalent to `p`, // unless `p` is empty. For example, ((a|b)|(c|d)) is equivalent to (a|b|c|d). // As a special case, `ParserStructure(emptyList(), emptyList())` represents a parser that recognizes an empty @@ -78,6 +82,9 @@ internal fun List>.concat(): ParserStructure { simplified.followedBy.ifEmpty { listOf(simplified) } else listOf(simplified) + }.ifEmpty { + // preserving the invariant that `mergedTails` contains all unconditional modifications + listOf(ParserStructure(unconditionalModificationsForTails, emptyList())) } return if (currentNumberSpan == null) { // the last operation was not a number span, or it was a number span that we are allowed to interrupt @@ -115,7 +122,7 @@ internal fun List>.concat(): ParserStructure { } } val naiveParser = foldRight(ParserStructure(emptyList(), emptyList())) { parser, acc -> parser.append(acc) } - return naiveParser.simplify() + return naiveParser.simplify(emptyList()) } internal interface Copyable { diff --git a/core/common/test/format/DateTimeFormatTest.kt b/core/common/test/format/DateTimeFormatTest.kt index f423a1654..d3e90f9ac 100644 --- a/core/common/test/format/DateTimeFormatTest.kt +++ b/core/common/test/format/DateTimeFormatTest.kt @@ -137,6 +137,18 @@ class DateTimeFormatTest { } } } + + @Test + fun testOptionalBetweenConsecutiveNumbers() { + val format = UtcOffset.Format { + offsetHours(Padding.NONE) + optional { + optional { offsetSecondsOfMinute() } + offsetMinutesOfHour() + } + } + assertEquals(UtcOffset(-7, -30), format.parse("-730")) + } } fun DateTimeFormat.assertCanNotParse(input: String) { From 420a0346e141eea45807401f1bfe2158d4b7bb2d Mon Sep 17 00:00:00 2001 From: Dmitry Khalanskiy Date: Tue, 5 Mar 2024 12:03:11 +0100 Subject: [PATCH 2/3] Forbid empty strings in month and day-of-week names and AM/PM markers --- core/common/src/format/DateTimeFormatBuilder.kt | 2 ++ core/common/src/format/LocalDateFormat.kt | 6 ++++++ core/common/src/internal/format/parser/ParserOperation.kt | 1 + 3 files changed, 9 insertions(+) diff --git a/core/common/src/format/DateTimeFormatBuilder.kt b/core/common/src/format/DateTimeFormatBuilder.kt index 328ecdbfa..a235b7ded 100644 --- a/core/common/src/format/DateTimeFormatBuilder.kt +++ b/core/common/src/format/DateTimeFormatBuilder.kt @@ -129,6 +129,8 @@ public sealed interface DateTimeFormatBuilder { * * [am] is used for the AM marker (0-11 hours), [pm] is used for the PM marker (12-23 hours). * + * Empty strings can not be used as markers. + * * @see [amPmHour] */ public fun amPmMarker(am: String, pm: String) diff --git a/core/common/src/format/LocalDateFormat.kt b/core/common/src/format/LocalDateFormat.kt index ccc39d0cd..165f9ac2e 100644 --- a/core/common/src/format/LocalDateFormat.kt +++ b/core/common/src/format/LocalDateFormat.kt @@ -12,6 +12,8 @@ import kotlinx.datetime.internal.format.parser.Copyable /** * A description of how month names are formatted. + * + * An empty string can not be a month name. */ public class MonthNames( /** @@ -21,6 +23,7 @@ public class MonthNames( ) { init { require(names.size == 12) { "Month names must contain exactly 12 elements" } + names.forEach { require(it.isNotEmpty()) { "A month name can not be empty" } } } /** @@ -63,6 +66,8 @@ internal fun MonthNames.toKotlinCode(): String = when (this.names) { /** * A description of how day of week names are formatted. + * + * An empty string can not be a day-of-week name. */ public class DayOfWeekNames( /** @@ -72,6 +77,7 @@ public class DayOfWeekNames( ) { init { require(names.size == 7) { "Day of week names must contain exactly 7 elements" } + names.forEach { require(it.isNotEmpty()) { "A month name can not be empty" } } } /** diff --git a/core/common/src/internal/format/parser/ParserOperation.kt b/core/common/src/internal/format/parser/ParserOperation.kt index 275f2495f..02ba4c76c 100644 --- a/core/common/src/internal/format/parser/ParserOperation.kt +++ b/core/common/src/internal/format/parser/ParserOperation.kt @@ -157,6 +157,7 @@ internal class StringSetParserOperation( init { for (string in strings) { + require(string.isNotEmpty()) { "Found an empty string in $whatThisExpects" } var node = trie for (char in string) { val searchResult = node.children.binarySearchBy(char.toString()) { it.first } From b5f40cdd96988b22fdf695cd075d7b581ec223a1 Mon Sep 17 00:00:00 2001 From: Dmitry Khalanskiy Date: Mon, 18 Mar 2024 11:08:00 +0100 Subject: [PATCH 3/3] Document the thrown exceptions better; forbid parsing equal names --- .../src/format/DateTimeFormatBuilder.kt | 1 + core/common/src/format/LocalDateFormat.kt | 22 ++++++++++--- .../internal/format/parser/ParserOperation.kt | 1 + .../common/test/format/LocalDateFormatTest.kt | 32 +++++++++++++++++++ .../common/test/format/LocalTimeFormatTest.kt | 18 +++++++++++ 5 files changed, 70 insertions(+), 4 deletions(-) diff --git a/core/common/src/format/DateTimeFormatBuilder.kt b/core/common/src/format/DateTimeFormatBuilder.kt index a235b7ded..2230051dd 100644 --- a/core/common/src/format/DateTimeFormatBuilder.kt +++ b/core/common/src/format/DateTimeFormatBuilder.kt @@ -130,6 +130,7 @@ public sealed interface DateTimeFormatBuilder { * [am] is used for the AM marker (0-11 hours), [pm] is used for the PM marker (12-23 hours). * * Empty strings can not be used as markers. + * [IllegalArgumentException] is thrown if either [am] or [pm] is empty or if they are equal. * * @see [amPmHour] */ diff --git a/core/common/src/format/LocalDateFormat.kt b/core/common/src/format/LocalDateFormat.kt index 165f9ac2e..21484c70b 100644 --- a/core/common/src/format/LocalDateFormat.kt +++ b/core/common/src/format/LocalDateFormat.kt @@ -13,7 +13,7 @@ import kotlinx.datetime.internal.format.parser.Copyable /** * A description of how month names are formatted. * - * An empty string can not be a month name. + * An [IllegalArgumentException] will be thrown if some month name is empty or there are duplicate names. */ public class MonthNames( /** @@ -23,7 +23,14 @@ public class MonthNames( ) { init { require(names.size == 12) { "Month names must contain exactly 12 elements" } - names.forEach { require(it.isNotEmpty()) { "A month name can not be empty" } } + names.indices.forEach { ix -> + require(names[ix].isNotEmpty()) { "A month name can not be empty" } + for (ix2 in 0 until ix) { + require(names[ix] != names[ix2]) { + "Month names must be unique, but '${names[ix]}' was repeated" + } + } + } } /** @@ -67,7 +74,7 @@ internal fun MonthNames.toKotlinCode(): String = when (this.names) { /** * A description of how day of week names are formatted. * - * An empty string can not be a day-of-week name. + * An [IllegalArgumentException] will be thrown if some day-of-week name is empty or there are duplicate names. */ public class DayOfWeekNames( /** @@ -77,7 +84,14 @@ public class DayOfWeekNames( ) { init { require(names.size == 7) { "Day of week names must contain exactly 7 elements" } - names.forEach { require(it.isNotEmpty()) { "A month name can not be empty" } } + names.indices.forEach { ix -> + require(names[ix].isNotEmpty()) { "A day-of-week name can not be empty" } + for (ix2 in 0 until ix) { + require(names[ix] != names[ix2]) { + "Day-of-week names must be unique, but '${names[ix]}' was repeated" + } + } + } } /** diff --git a/core/common/src/internal/format/parser/ParserOperation.kt b/core/common/src/internal/format/parser/ParserOperation.kt index 02ba4c76c..6ac3a1c2c 100644 --- a/core/common/src/internal/format/parser/ParserOperation.kt +++ b/core/common/src/internal/format/parser/ParserOperation.kt @@ -167,6 +167,7 @@ internal class StringSetParserOperation( node.children[searchResult].second } } + require(!node.isTerminal) { "The string '$string' was passed several times" } node.isTerminal = true } fun reduceTrie(trie: TrieNode) { diff --git a/core/common/test/format/LocalDateFormatTest.kt b/core/common/test/format/LocalDateFormatTest.kt index 88758b99c..d1363253b 100644 --- a/core/common/test/format/LocalDateFormatTest.kt +++ b/core/common/test/format/LocalDateFormatTest.kt @@ -221,6 +221,38 @@ class LocalDateFormatTest { assertEquals("2020 Jan 05", format.format(LocalDate(2020, 1, 5))) } + @Test + fun testEmptyMonthNames() { + val names = MonthNames.ENGLISH_FULL.names + for (i in 0 until 12) { + val newNames = (0 until 12).map { if (it == i) "" else names[it] } + assertFailsWith { MonthNames(newNames) } + } + } + + @Test + fun testEmptyDayOfWeekNames() { + val names = DayOfWeekNames.ENGLISH_FULL.names + for (i in 0 until 7) { + val newNames = (0 until 7).map { if (it == i) "" else names[it] } + assertFailsWith { DayOfWeekNames(newNames) } + } + } + + @Test + fun testIdenticalMonthNames() { + assertFailsWith { + MonthNames("Jan", "Jan", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec") + } + } + + @Test + fun testIdenticalDayOfWeekNames() { + assertFailsWith { + DayOfWeekNames("Mon", "Tue", "Tue", "Thu", "Fri", "Sat", "Sun") + } + } + private fun test(strings: Map>>, format: DateTimeFormat) { for ((date, stringsForDate) in strings) { val (canonicalString, otherStrings) = stringsForDate diff --git a/core/common/test/format/LocalTimeFormatTest.kt b/core/common/test/format/LocalTimeFormatTest.kt index 595bcc527..80934fc39 100644 --- a/core/common/test/format/LocalTimeFormatTest.kt +++ b/core/common/test/format/LocalTimeFormatTest.kt @@ -216,6 +216,24 @@ class LocalTimeFormatTest { } } + @Test + fun testEmptyAmPmMarkers() { + assertFailsWith { + LocalTime.Format { + amPmMarker("", "pm") + } + } + } + + @Test + fun testIdenticalAmPmMarkers() { + assertFailsWith { + LocalTime.Format { + amPmMarker("pm", "pm") + } + } + } + private fun test(strings: Map>>, format: DateTimeFormat) { for ((date, stringsForDate) in strings) { val (canonicalString, otherStrings) = stringsForDate