diff --git a/src/builtins/core/date.rs b/src/builtins/core/date.rs index 64e1daaca..bb1c24caa 100644 --- a/src/builtins/core/date.rs +++ b/src/builtins/core/date.rs @@ -10,7 +10,7 @@ use crate::{ ArithmeticOverflow, DifferenceOperation, DifferenceSettings, Disambiguation, DisplayCalendar, ResolvedRoundingOptions, Unit, UnitGroup, }, - parsers::{parse_date_time, IxdtfStringBuilder}, + parsers::{parse_date_time, IxdtfStringBuilder, TemporalParser}, provider::{NeverProvider, TimeZoneProvider}, MonthCode, TemporalError, TemporalResult, TemporalUnwrap, TimeZone, }; @@ -490,6 +490,25 @@ impl PlainDate { Self::try_new(date.year, date.month, date.day, calendar) } + /// Converts a UTF-16 encoded string into a `PlainDate`. + pub fn from_utf16(s: &[u16]) -> TemporalResult { + let parser = TemporalParser::from_utf16(s); + let parsed = parser.parse_date_time()?; + + let calendar = if let Some(cal_bytes) = parsed.calendar { + Calendar::try_from_utf8(&cal_bytes)? + } else { + Calendar::default() + }; + + Self::try_new( + parsed.iso.date.year, + parsed.iso.date.month, + parsed.iso.date.day, + calendar, + ) + } + /// Creates a date time with values from a `PartialDate`. pub fn with( &self, diff --git a/src/builtins/core/datetime.rs b/src/builtins/core/datetime.rs index 55e75018f..0eaa4ce16 100644 --- a/src/builtins/core/datetime.rs +++ b/src/builtins/core/datetime.rs @@ -12,10 +12,10 @@ use crate::{ DisplayCalendar, ResolvedRoundingOptions, RoundingOptions, ToStringRoundingOptions, Unit, UnitGroup, }, - parsers::{parse_date_time, IxdtfStringBuilder}, + parsers::{IxdtfStringBuilder, TemporalParser}, primitive::FiniteF64, provider::{NeverProvider, TimeZoneProvider}, - temporal_assert, MonthCode, TemporalError, TemporalResult, TemporalUnwrap, TimeZone, + temporal_assert, MonthCode, TemporalError, TemporalResult, TimeZone, }; use alloc::string::String; use core::{cmp::Ordering, str::FromStr}; @@ -549,30 +549,30 @@ impl PlainDateTime { // Converts a UTF-8 encoded string into a `PlainDateTime`. pub fn from_utf8(s: &[u8]) -> TemporalResult { - let parse_record = parse_date_time(s)?; - - let calendar = parse_record - .calendar - .map(Calendar::try_from_utf8) - .transpose()? - .unwrap_or_default(); - - let time = parse_record - .time - .map(IsoTime::from_time_record) - .transpose()? - .unwrap_or_default(); - - let parsed_date = parse_record.date.temporal_unwrap()?; - - let date = IsoDate::new_with_overflow( - parsed_date.year, - parsed_date.month, - parsed_date.day, - ArithmeticOverflow::Reject, - )?; + let parser = TemporalParser::from_utf8(s); + let parsed = parser.parse_date_time()?; + + let calendar = if let Some(cal_bytes) = parsed.calendar { + Calendar::try_from_utf8(&cal_bytes)? + } else { + Calendar::default() + }; + + Ok(Self::new_unchecked(parsed.iso, calendar)) + } + + /// Converts a UTF-16 encoded string into a `PlainDateTime`. + pub fn from_utf16(s: &[u16]) -> TemporalResult { + let parser = TemporalParser::from_utf16(s); + let parsed = parser.parse_date_time()?; + + let calendar = if let Some(cal_bytes) = parsed.calendar { + Calendar::try_from_utf8(&cal_bytes)? + } else { + Calendar::default() + }; - Ok(Self::new_unchecked(IsoDateTime::new(date, time)?, calendar)) + Ok(Self::new_unchecked(parsed.iso, calendar)) } /// Creates a new `DateTime` with the fields of a `PartialDateTime`. @@ -1530,4 +1530,136 @@ mod tests { "pads 4 decimal places to 9" ); } + + #[test] + fn test_utf16_datetime_parsing() { + use alloc::vec::Vec; + + let datetime_str = "2023-05-15T14:30:45.123"; + let datetime_utf16: Vec = datetime_str.encode_utf16().collect(); + + // Test UTF-16 parsing + let datetime_utf16_result = PlainDateTime::from_utf16(&datetime_utf16).unwrap(); + + // Test UTF-8 parsing for comparison + let datetime_utf8_result = PlainDateTime::from_utf8(datetime_str.as_bytes()).unwrap(); + + // Compare results + assert_eq!(datetime_utf16_result.year(), datetime_utf8_result.year()); + assert_eq!(datetime_utf16_result.month(), datetime_utf8_result.month()); + assert_eq!(datetime_utf16_result.day(), datetime_utf8_result.day()); + assert_eq!(datetime_utf16_result.hour(), datetime_utf8_result.hour()); + assert_eq!( + datetime_utf16_result.minute(), + datetime_utf8_result.minute() + ); + assert_eq!( + datetime_utf16_result.second(), + datetime_utf8_result.second() + ); + assert_eq!( + datetime_utf16_result.millisecond(), + datetime_utf8_result.millisecond() + ); + + // Test specific values + assert_eq!(datetime_utf16_result.year(), 2023); + assert_eq!(datetime_utf16_result.month(), 5); + assert_eq!(datetime_utf16_result.day(), 15); + assert_eq!(datetime_utf16_result.hour(), 14); + assert_eq!(datetime_utf16_result.minute(), 30); + assert_eq!(datetime_utf16_result.second(), 45); + assert_eq!(datetime_utf16_result.millisecond(), 123); + } + + #[test] + fn test_temporal_parser_from_str_as_utf8() { + use crate::parsers::TemporalParser; + + let datetime_str = "2023-05-15T14:30:45.123"; + let parser = TemporalParser::from_str_as_utf8(datetime_str); + + // Test that the parser works correctly with the renamed method + let parsed = parser.parse_date_time().unwrap(); + + assert_eq!(parsed.iso.date.year, 2023); + assert_eq!(parsed.iso.date.month, 5); + assert_eq!(parsed.iso.date.day, 15); + assert_eq!(parsed.iso.time.hour, 14); + assert_eq!(parsed.iso.time.minute, 30); + assert_eq!(parsed.iso.time.second, 45); + assert_eq!(parsed.iso.time.millisecond, 123); + } + + #[test] + fn test_all_temporal_types_utf16_support() { + use crate::{Instant, PlainDate, PlainMonthDay, PlainTime, PlainYearMonth}; + use alloc::vec::Vec; + + // Test all temporal types have consistent UTF-16 support + let datetime_str = "2023-05-15T14:30:45.123"; + let datetime_utf16: Vec = datetime_str.encode_utf16().collect(); + + let time_str = "14:30:45.123"; + let time_utf16: Vec = time_str.encode_utf16().collect(); + + let date_str = "2023-05-15T00:00:00"; + let date_utf16: Vec = date_str.encode_utf16().collect(); + + let year_month_str = "2023-05"; + let year_month_utf16: Vec = year_month_str.encode_utf16().collect(); + + let month_day_str = "05-15"; + let month_day_utf16: Vec = month_day_str.encode_utf16().collect(); + + let instant_str = "2023-05-15T14:30:45.123Z"; + let instant_utf16: Vec = instant_str.encode_utf16().collect(); + + // Test that all types can parse UTF-16 + let datetime = PlainDateTime::from_utf16(&datetime_utf16).unwrap(); + assert_eq!(datetime.year(), 2023); + assert_eq!(datetime.month(), 5); + assert_eq!(datetime.day(), 15); + assert_eq!(datetime.hour(), 14); + assert_eq!(datetime.minute(), 30); + assert_eq!(datetime.second(), 45); + assert_eq!(datetime.millisecond(), 123); + + let time = PlainTime::from_utf16(&time_utf16).unwrap(); + assert_eq!(time.hour(), 14); + assert_eq!(time.minute(), 30); + assert_eq!(time.second(), 45); + assert_eq!(time.millisecond(), 123); + + let date = PlainDate::from_utf16(&date_utf16).unwrap(); + assert_eq!(date.year(), 2023); + assert_eq!(date.month(), 5); + assert_eq!(date.day(), 15); + + let year_month = PlainYearMonth::from_utf16(&year_month_utf16).unwrap(); + assert_eq!(year_month.year(), 2023); + assert_eq!(year_month.month(), 5); + + let month_day = PlainMonthDay::from_utf16(&month_day_utf16).unwrap(); + assert_eq!(month_day.iso_month(), 5); + assert_eq!(month_day.day(), 15); + + let instant = Instant::from_utf16(&instant_utf16).unwrap(); + assert_eq!(instant.epoch_milliseconds(), 1684161045123); + + // Test UTF-16 vs UTF-8 equivalence + let datetime_utf8 = PlainDateTime::from_utf8(datetime_str.as_bytes()).unwrap(); + let time_utf8 = PlainTime::from_utf8(time_str.as_bytes()).unwrap(); + let date_utf8 = PlainDate::from_utf8(date_str.as_bytes()).unwrap(); + let year_month_utf8 = PlainYearMonth::from_utf8(year_month_str.as_bytes()).unwrap(); + let month_day_utf8 = PlainMonthDay::from_utf8(month_day_str.as_bytes()).unwrap(); + let instant_utf8 = Instant::from_utf8(instant_str.as_bytes()).unwrap(); + + assert_eq!(datetime, datetime_utf8); + assert_eq!(time, time_utf8); + assert_eq!(date, date_utf8); + assert_eq!(year_month, year_month_utf8); + assert_eq!(month_day, month_day_utf8); + assert_eq!(instant, instant_utf8); + } } diff --git a/src/builtins/core/instant.rs b/src/builtins/core/instant.rs index 0b61cbf48..416c72d4d 100644 --- a/src/builtins/core/instant.rs +++ b/src/builtins/core/instant.rs @@ -12,7 +12,7 @@ use crate::{ DifferenceOperation, DifferenceSettings, DisplayOffset, ResolvedRoundingOptions, RoundingOptions, ToStringRoundingOptions, Unit, UnitGroup, }, - parsers::{parse_instant, IxdtfStringBuilder}, + parsers::{IxdtfStringBuilder, TemporalParser}, provider::TimeZoneProvider, rounding::{IncrementRounder, Round}, unix_time::EpochNanoseconds, @@ -278,10 +278,11 @@ impl Instant { // Converts a UTF-8 encoded string into a `Instant`. pub fn from_utf8(s: &[u8]) -> TemporalResult { - let ixdtf_record = parse_instant(s)?; + let parser = TemporalParser::from_utf8(s); + let parsed = parser.parse_instant()?; // Find the offset - let ns_offset = match ixdtf_record.offset { + let ns_offset = match parsed.offset { UtcOffsetRecordOrZ::Offset(offset) => { let ns = offset .fraction() @@ -296,21 +297,63 @@ impl Instant { UtcOffsetRecordOrZ::Z => 0, }; - let time_nanoseconds = ixdtf_record - .time - .fraction - .and_then(|x| x.to_nanoseconds()) - .unwrap_or(0); + let time_nanoseconds = parsed.iso.time.millisecond as u32 * 1_000_000 + + parsed.iso.time.microsecond as u32 * 1_000 + + parsed.iso.time.nanosecond as u32; let (millisecond, rem) = time_nanoseconds.div_rem_euclid(&1_000_000); let (microsecond, nanosecond) = rem.div_rem_euclid(&1_000); let balanced = IsoDateTime::balance( - ixdtf_record.date.year, - ixdtf_record.date.month.into(), - ixdtf_record.date.day.into(), - ixdtf_record.time.hour.into(), - ixdtf_record.time.minute.into(), - ixdtf_record.time.second.clamp(0, 59).into(), + parsed.iso.date.year, + parsed.iso.date.month.into(), + parsed.iso.date.day.into(), + parsed.iso.time.hour.into(), + parsed.iso.time.minute.into(), + parsed.iso.time.second.clamp(0, 59).into(), + millisecond.into(), + microsecond.into(), + i128::from(nanosecond) - i128::from(ns_offset), + ); + + let nanoseconds = balanced.as_nanoseconds()?; + + Ok(Self(nanoseconds)) + } + + /// Converts a UTF-16 encoded string into a `Instant`. + pub fn from_utf16(s: &[u16]) -> TemporalResult { + let parser = TemporalParser::from_utf16(s); + let parsed = parser.parse_instant()?; + + // Find the offset + let ns_offset = match parsed.offset { + UtcOffsetRecordOrZ::Offset(offset) => { + let ns = offset + .fraction() + .and_then(|x| x.to_nanoseconds()) + .unwrap_or(0); + (offset.hour() as i64 * NANOSECONDS_PER_HOUR + + i64::from(offset.minute()) * NANOSECONDS_PER_MINUTE + + i64::from(offset.second().unwrap_or(0)) * NANOSECONDS_PER_SECOND + + i64::from(ns)) + * offset.sign() as i64 + } + UtcOffsetRecordOrZ::Z => 0, + }; + + let time_nanoseconds = parsed.iso.time.millisecond as u32 * 1_000_000 + + parsed.iso.time.microsecond as u32 * 1_000 + + parsed.iso.time.nanosecond as u32; + let (millisecond, rem) = time_nanoseconds.div_rem_euclid(&1_000_000); + let (microsecond, nanosecond) = rem.div_rem_euclid(&1_000); + + let balanced = IsoDateTime::balance( + parsed.iso.date.year, + parsed.iso.date.month.into(), + parsed.iso.date.day.into(), + parsed.iso.time.hour.into(), + parsed.iso.time.minute.into(), + parsed.iso.time.second.clamp(0, 59).into(), millisecond.into(), microsecond.into(), i128::from(nanosecond) - i128::from(ns_offset), diff --git a/src/builtins/core/month_day.rs b/src/builtins/core/month_day.rs index 57fe742d2..0060b3974 100644 --- a/src/builtins/core/month_day.rs +++ b/src/builtins/core/month_day.rs @@ -6,8 +6,8 @@ use core::str::FromStr; use crate::{ iso::IsoDate, options::{ArithmeticOverflow, DisplayCalendar}, - parsers::{FormattableCalendar, FormattableDate, FormattableMonthDay}, - Calendar, MonthCode, TemporalError, TemporalResult, TemporalUnwrap, + parsers::{FormattableCalendar, FormattableDate, FormattableMonthDay, TemporalParser}, + Calendar, MonthCode, TemporalError, TemporalResult, }; use super::{calendar::month_to_month_code, PartialDate, PlainDate}; @@ -189,13 +189,14 @@ impl PlainMonthDay { // Converts a UTF-8 encoded string into a `PlainMonthDay`. pub fn from_utf8(s: &[u8]) -> TemporalResult { - let record = crate::parsers::parse_month_day(s)?; + let parser = TemporalParser::from_utf8(s); + let parsed = parser.parse_month_day()?; - let calendar = record - .calendar - .map(Calendar::try_from_utf8) - .transpose()? - .unwrap_or_default(); + let calendar = if let Some(cal_bytes) = parsed.calendar { + Calendar::try_from_utf8(&cal_bytes)? + } else { + Calendar::default() + }; // ParseISODateTime // Step 4.a.ii.3 @@ -206,13 +207,38 @@ impl PlainMonthDay { return Err(TemporalError::range().with_message("non-ISO calendar not supported.")); } - let date = record.date; + Self::new_with_overflow( + parsed.iso.month, + parsed.iso.day, + calendar, + ArithmeticOverflow::Reject, + None, + ) + } + + /// Converts a UTF-16 encoded string into a `PlainMonthDay`. + pub fn from_utf16(s: &[u16]) -> TemporalResult { + let parser = TemporalParser::from_utf16(s); + let parsed = parser.parse_month_day()?; + + let calendar = if let Some(cal_bytes) = parsed.calendar { + Calendar::try_from_utf8(&cal_bytes)? + } else { + Calendar::default() + }; - let date = date.temporal_unwrap()?; + // ParseISODateTime + // Step 4.a.ii.3 + // If goal is TemporalMonthDayString or TemporalYearMonthString, calendar is + // not empty, and the ASCII-lowercase of calendar is not "iso8601", throw a + // RangeError exception. + if !calendar.is_iso() { + return Err(TemporalError::range().with_message("non-ISO calendar not supported.")); + } Self::new_with_overflow( - date.month, - date.day, + parsed.iso.month, + parsed.iso.day, calendar, ArithmeticOverflow::Reject, None, diff --git a/src/builtins/core/time.rs b/src/builtins/core/time.rs index 01bdb11bb..3678f4b4e 100644 --- a/src/builtins/core/time.rs +++ b/src/builtins/core/time.rs @@ -7,7 +7,7 @@ use crate::{ ArithmeticOverflow, DifferenceOperation, DifferenceSettings, ResolvedRoundingOptions, RoundingIncrement, RoundingMode, ToStringRoundingOptions, Unit, UnitGroup, }, - parsers::{parse_time, IxdtfStringBuilder}, + parsers::{IxdtfStringBuilder, TemporalParser}, TemporalError, TemporalResult, }; use alloc::string::String; @@ -418,9 +418,16 @@ impl PlainTime { // Converts a UTF-8 encoded string into a `PlainTime`. pub fn from_utf8(s: &[u8]) -> TemporalResult { - let result = parse_time(s)?; - let iso = IsoTime::from_time_record(result)?; - Ok(Self::new_unchecked(iso)) + let parser = TemporalParser::from_utf8(s); + let parsed = parser.parse_time()?; + Ok(Self::new_unchecked(parsed.iso)) + } + + /// Converts a UTF-16 encoded string into a `PlainTime`. + pub fn from_utf16(s: &[u16]) -> TemporalResult { + let parser = TemporalParser::from_utf16(s); + let parsed = parser.parse_time()?; + Ok(Self::new_unchecked(parsed.iso)) } /// Creates a new `PlainTime` using the current `PlainTime` fields as a fallback. diff --git a/src/builtins/core/year_month.rs b/src/builtins/core/year_month.rs index eaebc8f11..51f3b5956 100644 --- a/src/builtins/core/year_month.rs +++ b/src/builtins/core/year_month.rs @@ -11,7 +11,7 @@ use crate::{ ArithmeticOverflow, DifferenceOperation, DifferenceSettings, DisplayCalendar, ResolvedRoundingOptions, RoundingIncrement, Unit, UnitGroup, }, - parsers::{FormattableCalendar, FormattableDate, FormattableYearMonth}, + parsers::{FormattableCalendar, FormattableDate, FormattableYearMonth, TemporalParser}, provider::NeverProvider, temporal_assert, utils::pad_iso_year, @@ -525,12 +525,14 @@ impl PlainYearMonth { // Converts a UTF-8 encoded string into a `PlainYearMonth`. pub fn from_utf8(s: &[u8]) -> TemporalResult { - let record = crate::parsers::parse_year_month(s)?; - let calendar = record - .calendar - .map(Calendar::try_from_utf8) - .transpose()? - .unwrap_or_default(); + let parser = TemporalParser::from_utf8(s); + let parsed = parser.parse_year_month()?; + + let calendar = if let Some(cal_bytes) = parsed.calendar { + Calendar::try_from_utf8(&cal_bytes)? + } else { + Calendar::default() + }; // ParseISODateTime // Step 4.a.ii.3 @@ -541,11 +543,49 @@ impl PlainYearMonth { return Err(TemporalError::range().with_message("non-ISO calendar not supported.")); } - let date = record.date.temporal_unwrap()?; + // The below steps are from `ToTemporalYearMonth` + // 10. Let isoDate be CreateISODateRecord(result.[[Year]], result.[[Month]], result.[[Day]]). + let iso = parsed.iso; + + // 11. If ISOYearMonthWithinLimits(isoDate) is false, throw a RangeError exception. + if !year_month_within_limits(iso.year, iso.month) { + return Err(TemporalError::range().with_message("Exceeded valid range.")); + } + + let intermediate = Self::new_unchecked(iso, calendar); + // 12. Set result to ISODateToFields(calendar, isoDate, year-month). + let partial = PartialYearMonth::try_from_year_month(&intermediate)?; + // 13. NOTE: The following operation is called with constrain regardless of the + // value of overflow, in order for the calendar to store a canonical value in the + // [[Day]] field of the [[ISODate]] internal slot of the result. + // 14. Set isoDate to ? CalendarYearMonthFromFields(calendar, result, constrain). + // 15. Return ! CreateTemporalYearMonth(isoDate, calendar). + PlainYearMonth::from_partial(partial, ArithmeticOverflow::Constrain) + } + + /// Converts a UTF-16 encoded string into a `PlainYearMonth`. + pub fn from_utf16(s: &[u16]) -> TemporalResult { + let parser = TemporalParser::from_utf16(s); + let parsed = parser.parse_year_month()?; + + let calendar = if let Some(cal_bytes) = parsed.calendar { + Calendar::try_from_utf8(&cal_bytes)? + } else { + Calendar::default() + }; + + // ParseISODateTime + // Step 4.a.ii.3 + // If goal is TemporalMonthDayString or TemporalYearMonthString, calendar is + // not empty, and the ASCII-lowercase of calendar is not "iso8601", throw a + // RangeError exception. + if !calendar.is_iso() { + return Err(TemporalError::range().with_message("non-ISO calendar not supported.")); + } // The below steps are from `ToTemporalYearMonth` // 10. Let isoDate be CreateISODateRecord(result.[[Year]], result.[[Month]], result.[[Day]]). - let iso = IsoDate::new_unchecked(date.year, date.month, date.day); + let iso = parsed.iso; // 11. If ISOYearMonthWithinLimits(isoDate) is false, throw a RangeError exception. if !year_month_within_limits(iso.year, iso.month) { diff --git a/src/iso.rs b/src/iso.rs index fdda011f3..f24c3ccb3 100644 --- a/src/iso.rs +++ b/src/iso.rs @@ -44,6 +44,22 @@ use crate::{ use icu_calendar::{Date as IcuDate, Iso}; use num_traits::{cast::FromPrimitive, Euclid}; +// ISO/Temporal specification limits +// +// Year limits are defined by the ECMAScript Temporal specification: +// https://tc39.es/proposal-temporal/#sec-temporal-date-objects +// These limits ensure compatibility with ISO 8601 extended year format +// and avoid issues with JavaScript's Date object limitations. +// +// Time component limits follow ISO 8601 standard: +// https://www.iso.org/iso-8601-date-and-time-format.html +// See also RFC 3339: https://tools.ietf.org/html/rfc3339 + +/// Minimum supported year (-271821-04-19T00:00:00Z corresponds to ECMAScript's minimum time value) +pub(crate) const MIN_ISO_YEAR: i32 = -271821; +/// Maximum supported year (275760-09-13T00:00:00Z corresponds to ECMAScript's maximum time value) +pub(crate) const MAX_ISO_YEAR: i32 = 275760; + /// `IsoDateTime` is the record of the `IsoDate` and `IsoTime` internal slots. #[non_exhaustive] #[derive(Debug, Default, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] diff --git a/src/lib.rs b/src/lib.rs index 92e34ef1a..be66b20e1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -48,6 +48,42 @@ //! //! ``` //! +//! ### Parse timestamps with the public `TemporalParser` API +//! +//! The `TemporalParser` provides a high-level, public API for parsing IXDTF strings +//! with built-in validation and invariant checking. +//! +//! ```rust +//! use temporal_rs::parsers::TemporalParser; +//! +//! let parser = TemporalParser::from_utf8(b"2025-01-15T14:30:00"); +//! +//! // Parse a PlainDateTime with validation +//! let dt_result = parser.parse_date_time(); +//! assert!(dt_result.is_ok()); +//! let parsed = dt_result.unwrap(); +//! assert_eq!(parsed.iso.date.year, 2025); +//! assert_eq!(parsed.iso.time.hour, 14); +//! // Calendar will be None since our test string doesn't include calendar annotation +//! +//! // Parse an Instant +//! let instant_parser = TemporalParser::from_utf8(b"2025-01-15T14:30:00Z"); +//! let instant_result = instant_parser.parse_instant(); +//! assert!(instant_result.is_ok()); +//! +//! // Parse a ZonedDateTime +//! let zdt_parser = TemporalParser::from_utf8(b"2025-01-15T14:30:00Z[America/New_York]"); +//! let zdt_result = zdt_parser.parse_zoned_date_time(); +//! assert!(zdt_result.is_ok()); +//! let zdt_parsed = zdt_result.unwrap(); +//! assert_eq!(zdt_parsed.timezone(), "America/New_York"); +//! +//! // Invalid dates are properly rejected +//! let invalid_parser = TemporalParser::from_utf8(b"2025-02-30T14:30:00"); // Feb 30th doesn't exist +//! let invalid_result = invalid_parser.parse_date_time(); +//! assert!(invalid_result.is_err()); +//! ``` +//! //! ### Create a `ZonedDateTime` for a RFC9557 IXDTF string. //! //! **Important Note:** The below API is enabled with the diff --git a/src/parsers.rs b/src/parsers.rs index ed2e7c40b..946d84810 100644 --- a/src/parsers.rs +++ b/src/parsers.rs @@ -1,15 +1,22 @@ //! This module implements Temporal Date/Time parsing functionality. - use crate::{ - iso::{IsoDate, IsoTime}, + iso::{year_month_within_limits, IsoDate, IsoDateTime, IsoTime, MAX_ISO_YEAR, MIN_ISO_YEAR}, options::{DisplayCalendar, DisplayOffset, DisplayTimeZone}, Sign, TemporalError, TemporalResult, }; -use alloc::format; +use alloc::{ + borrow::Cow, + format, + string::{String, ToString}, + vec::Vec, +}; +use ixdtf::ParseError; use ixdtf::{ - encoding::Utf8, + encoding::{Utf16, Utf8}, parsers::IxdtfParser, - records::{Annotation, DateRecord, IxdtfParseRecord, TimeRecord, UtcOffsetRecordOrZ}, + records::{ + Annotation, DateRecord, IxdtfParseRecord, TimeRecord, TimeZoneRecord, UtcOffsetRecordOrZ, + }, }; use writeable::{impl_display_with_writeable, LengthHint, Writeable}; @@ -17,6 +24,761 @@ mod timezone; pub(crate) use timezone::{parse_allowed_timezone_formats, parse_identifier}; +/// Validation errors specific to temporal parsing +#[derive(Debug, Clone)] +pub enum TemporalValidationError { + /// Year is outside the valid temporal range + InvalidYear(i32), + /// Combined date/time is outside representable range + DateTimeOutOfRange, + /// Parsing error from ixdtf + ParseError(String), +} + +impl TemporalValidationError { + /// Convert to a TemporalError with appropriate message + pub fn into_temporal_error(self) -> TemporalError { + match self { + Self::InvalidYear(year) => TemporalError::range().with_message(format!( + "Year {year} is outside valid range ({MIN_ISO_YEAR} to {MAX_ISO_YEAR})" + )), + Self::DateTimeOutOfRange => { + TemporalError::range().with_message("Date/time is outside representable range") + } + Self::ParseError(msg) => TemporalError::syntax().with_message(msg), + } + } +} + +/// Maps ixdtf ParseError to TemporalValidationError +fn map_parse_error(err: ParseError) -> TemporalValidationError { + use ParseError::*; + let message = match err { + InvalidMonthRange => "Month is outside valid range (1-12)".to_string(), + InvalidDayRange => "Day is outside valid range for the given month/year".to_string(), + DateYear => "Invalid year format".to_string(), + DateMonth => "Invalid month format".to_string(), + DateDay => "Invalid day format".to_string(), + TimeHour => "Invalid hour format".to_string(), + TimeMinuteSecond => "Invalid minute or second format".to_string(), + TimeSecond => "Invalid second format".to_string(), + FractionPart => "Invalid fractional seconds format".to_string(), + ParseFloat => "Invalid fractional seconds value".to_string(), + AbruptEnd { location } => format!("Unexpected end while parsing {location}"), + InvalidEnd => "Unexpected character at end of input".to_string(), + _ => format!("Parse error: {err:?}"), + }; + TemporalValidationError::ParseError(message) +} + +// ECMAScript Temporal specific validation +/// Validates a date record for ECMAScript Temporal year limits +fn validate_date_record_impl(record: DateRecord) -> Result { + // Only validate ECMAScript Temporal year limits + if !year_month_within_limits(record.year, record.month) { + return Err(TemporalValidationError::InvalidYear(record.year)); + } + + Ok(IsoDate::new_unchecked( + record.year, + record.month, + record.day, + )) +} + +/// Creates an IsoTime from a time record +fn validate_time_record_impl(record: TimeRecord) -> Result { + // ixdtf validates time components + IsoTime::from_time_record(record) + .map_err(|_| TemporalValidationError::ParseError("Invalid time components".to_string())) +} + +/// Parser encoding enum that specifies how temporal strings are encoded. +#[derive(Debug)] +pub enum ParserEncoding<'a> { + Utf8(&'a [u8]), + Utf16(&'a [u16]), +} + +/// Public parser that wraps `IxdtfParser` and enforces Temporal parsing requirements. +#[derive(Debug)] +pub struct TemporalParser<'a> { + encoding: ParserEncoding<'a>, +} + +impl<'a> TemporalParser<'a> { + /// Creates a new `TemporalParser` from UTF-8 bytes. + #[inline] + pub const fn from_utf8(source: &'a [u8]) -> Self { + Self { + encoding: ParserEncoding::Utf8(source), + } + } + + /// Creates a new `TemporalParser` from UTF-16 code units. + #[inline] + pub const fn from_utf16(source: &'a [u16]) -> Self { + Self { + encoding: ParserEncoding::Utf16(source), + } + } + + /// Creates a new `TemporalParser` from a string slice by converting to UTF-8 bytes. + #[inline] + pub fn from_str_as_utf8(source: &'a str) -> Self { + Self::from_utf8(source.as_bytes()) + } + + /// Parses the source into a `PlainDateTime` compatible record. + pub fn parse_date_time(&self) -> TemporalResult> { + match &self.encoding { + ParserEncoding::Utf8(source) => { + let record = parse_date_time(source)?; + self.validate_and_build_date_time(record) + } + ParserEncoding::Utf16(source) => { + let record = self.parse_date_time_utf16(source)?; + self.validate_and_build_date_time_utf16(record) + } + } + } + + /// Parses the source into a `ZonedDateTime` compatible record. + pub fn parse_zoned_date_time(&self) -> TemporalResult> { + match &self.encoding { + ParserEncoding::Utf8(source) => { + let source_str = core::str::from_utf8(source) + .map_err(|_| TemporalError::syntax().with_message("Invalid UTF-8 in source"))?; + let record = parse_zoned_date_time(source_str)?; + self.validate_and_build_zoned_date_time(record) + } + ParserEncoding::Utf16(source) => { + let record = self.parse_zoned_date_time_utf16(source)?; + self.validate_and_build_zoned_date_time_utf16(record) + } + } + } + + /// Parses the source into an `Instant` compatible record. + pub fn parse_instant(&self) -> TemporalResult { + let record = match &self.encoding { + ParserEncoding::Utf8(source) => parse_instant(source)?, + ParserEncoding::Utf16(source) => self.parse_instant_utf16(source)?, + }; + self.validate_and_build_instant(record) + } + + /// Parses the source into a `PlainTime` compatible record. + pub fn parse_time(&self) -> TemporalResult { + let record = match &self.encoding { + ParserEncoding::Utf8(source) => parse_time(source)?, + ParserEncoding::Utf16(source) => self.parse_time_utf16(source)?, + }; + self.validate_and_build_time(record) + } + + /// Parses the source into a `PlainYearMonth` compatible record. + pub fn parse_year_month(&self) -> TemporalResult> { + match &self.encoding { + ParserEncoding::Utf8(source) => { + let record = parse_year_month(source)?; + self.validate_and_build_year_month(record) + } + ParserEncoding::Utf16(source) => { + let record = self.parse_year_month_utf16(source)?; + self.validate_and_build_year_month_utf16(record) + } + } + } + + /// Parses the source into a `PlainMonthDay` compatible record. + pub fn parse_month_day(&self) -> TemporalResult> { + match &self.encoding { + ParserEncoding::Utf8(source) => { + let record = parse_month_day(source)?; + self.validate_and_build_month_day(record) + } + ParserEncoding::Utf16(source) => { + let record = self.parse_month_day_utf16(source)?; + self.validate_and_build_month_day_utf16(record) + } + } + } + + // Private UTF-16 parsing methods + + fn parse_date_time_utf16( + &self, + source: &'a [u16], + ) -> TemporalResult> { + let record = self.parse_ixdtf_utf16(source, ParseVariant::DateTime)?; + + if record.offset == Some(UtcOffsetRecordOrZ::Z) { + return Err(TemporalError::range() + .with_message("UTC designator is not valid for DateTime parsing.")); + } + + if let Some(date_record) = record.date { + validate_date_record_impl(date_record).map_err(|e| e.into_temporal_error())?; + } + if let Some(time_record) = record.time { + validate_time_record_impl(time_record).map_err(|e| e.into_temporal_error())?; + } + + Ok(record) + } + + fn parse_zoned_date_time_utf16( + &self, + source: &'a [u16], + ) -> TemporalResult> { + let record = self.parse_ixdtf_utf16(source, ParseVariant::DateTime)?; + + if record.tz.is_none() { + return Err(TemporalError::range() + .with_message("Time zone annotation is required for parsing a zoned date time.")); + } + + if let Some(date_record) = record.date { + validate_date_record_impl(date_record).map_err(|e| e.into_temporal_error())?; + } + if let Some(time_record) = record.time { + validate_time_record_impl(time_record).map_err(|e| e.into_temporal_error())?; + } + + Ok(record) + } + + fn parse_instant_utf16(&self, source: &'a [u16]) -> TemporalResult { + let record = self.parse_ixdtf_utf16(source, ParseVariant::DateTime)?; + + let IxdtfParseRecord { + date: Some(date), + time: Some(time), + offset: Some(offset), + .. + } = record + else { + return Err( + TemporalError::range().with_message("Required fields missing from Instant string.") + ); + }; + + validate_date_record_impl(date).map_err(|e| e.into_temporal_error())?; + validate_time_record_impl(time).map_err(|e| e.into_temporal_error())?; + + Ok(IxdtfParseInstantRecord { date, time, offset }) + } + + fn parse_time_utf16(&self, source: &'a [u16]) -> TemporalResult { + let time_record = self.parse_ixdtf_utf16(source, ParseVariant::Time); + + let Err(ref e) = time_record else { + return time_record.and_then(|record| self.check_time_record_utf16(record)); + }; + + let dt_parse = self.parse_date_time_utf16(source); + + match dt_parse { + Ok(dt) => self.check_time_record_utf16(dt), + _ => Err(TemporalError::range().with_message(format!("{e}"))), + } + } + + fn parse_year_month_utf16( + &self, + source: &'a [u16], + ) -> TemporalResult> { + let ym_record = self.parse_ixdtf_utf16(source, ParseVariant::YearMonth); + + let Err(ref e) = ym_record else { + return ym_record.and_then(|record| self.check_offset_utf16(record)); + }; + + let dt_parse = self.parse_date_time_utf16(source); + + match dt_parse { + Ok(dt) => self.check_offset_utf16(dt), + _ => Err(TemporalError::range().with_message(format!("{e}"))), + } + } + + fn parse_month_day_utf16( + &self, + source: &'a [u16], + ) -> TemporalResult> { + let md_record = self.parse_ixdtf_utf16(source, ParseVariant::MonthDay); + + let Err(ref e) = md_record else { + return md_record.and_then(|record| self.check_offset_utf16(record)); + }; + + let dt_parse = self.parse_date_time_utf16(source); + + match dt_parse { + Ok(dt) => self.check_offset_utf16(dt), + _ => Err(TemporalError::range().with_message(format!("{e}"))), + } + } + + fn parse_ixdtf_utf16( + &self, + source: &'a [u16], + variant: ParseVariant, + ) -> TemporalResult> { + fn cast_handler<'a>( + _: &mut IxdtfParser<'a, Utf16>, + handler: impl FnMut(Annotation<'a, Utf16>) -> Option>, + ) -> impl FnMut(Annotation<'a, Utf16>) -> Option> { + handler + } + + let mut first_calendar: Option> = None; + let mut critical_duplicate_calendar = false; + let mut parser = IxdtfParser::from_utf16(source); + + let handler = cast_handler(&mut parser, |annotation: Annotation| { + if annotation.key == "u-ca".encode_utf16().collect::>().as_slice() { + match first_calendar { + Some(ref cal) => { + if cal.critical || annotation.critical { + critical_duplicate_calendar = true + } + } + None => first_calendar = Some(annotation), + } + return None; + } + Some(annotation) + }); + + let mut record = match variant { + ParseVariant::YearMonth => parser.parse_year_month_with_annotation_handler(handler), + ParseVariant::MonthDay => parser.parse_month_day_with_annotation_handler(handler), + ParseVariant::DateTime => parser.parse_with_annotation_handler(handler), + ParseVariant::Time => parser.parse_time_with_annotation_handler(handler), + } + .map_err(|e| map_parse_error(e).into_temporal_error())?; + + if critical_duplicate_calendar { + return Err(TemporalError::range() + .with_message("Duplicate calendar value with critical flag found.")); + } + + if variant != ParseVariant::Time && record.date.is_none() { + return Err( + TemporalError::range().with_message("DateTime strings must contain a Date value.") + ); + } + + record.calendar = first_calendar.map(|v| v.value); + + Ok(record) + } + + fn check_offset_utf16( + &self, + record: IxdtfParseRecord<'a, Utf16>, + ) -> TemporalResult> { + if record.offset == Some(UtcOffsetRecordOrZ::Z) { + return Err(TemporalError::range() + .with_message("UTC designator is not valid for plain date/time parsing.")); + } + Ok(record) + } + + fn check_time_record_utf16( + &self, + record: IxdtfParseRecord<'a, Utf16>, + ) -> TemporalResult { + let record = self.check_offset_utf16(record)?; + let Some(time) = record.time else { + return Err(TemporalError::range() + .with_message("PlainTime can only be parsed from strings with a time component.")); + }; + Ok(time) + } + + // Helper function to convert UTF-16 calendar to a Cow<[u8]> + fn convert_utf16_calendar_to_cow(calendar_utf16: &[u16]) -> TemporalResult> { + let calendar_string = String::from_utf16(calendar_utf16) + .map_err(|_| TemporalError::syntax().with_message("Invalid UTF-16 in calendar"))?; + + Ok(Cow::Owned(calendar_string.into_bytes())) + } + + // Helper function to convert UTF-16 timezone to a Cow<[u8]> + fn convert_utf16_timezone_to_cow(timezone_utf16: &[u16]) -> TemporalResult> { + let timezone_string = String::from_utf16(timezone_utf16) + .map_err(|_| TemporalError::syntax().with_message("Invalid UTF-16 in timezone"))?; + + Ok(Cow::Owned(timezone_string.into_bytes())) + } + + // Private validation methods that enforce invariants + + fn validate_and_build_date_time( + &self, + record: IxdtfParseRecord<'a, Utf8>, + ) -> TemporalResult> { + let date_record = record.date.ok_or_else(|| { + TemporalError::range().with_message("Date component is required for DateTime parsing") + })?; + + let time_record = record.time.ok_or_else(|| { + TemporalError::range().with_message("Time component is required for DateTime parsing") + })?; + + let iso_date = self.validate_date_record(date_record)?; + let iso_time = self.validate_time_record(time_record)?; + + // Validate DateTime is within valid limits + let iso_datetime = IsoDateTime::new(iso_date, iso_time)?; + + Ok(ParsedDateTime { + iso: iso_datetime, + calendar: record.calendar.map(Cow::Borrowed), + offset: record.offset, + }) + } + + fn validate_and_build_date_time_utf16( + &self, + record: IxdtfParseRecord<'a, Utf16>, + ) -> TemporalResult> { + let date_record = record.date.ok_or_else(|| { + TemporalError::range().with_message("Date component is required for DateTime parsing") + })?; + + let time_record = record.time.ok_or_else(|| { + TemporalError::range().with_message("Time component is required for DateTime parsing") + })?; + + let iso_date = self.validate_date_record(date_record)?; + let iso_time = self.validate_time_record(time_record)?; + + // Validate DateTime is within valid limits + let iso_datetime = IsoDateTime::new(iso_date, iso_time)?; + + // Convert UTF-16 calendar to Cow if present + let calendar_cow = if let Some(calendar_utf16) = record.calendar { + Some(Self::convert_utf16_calendar_to_cow(calendar_utf16)?) + } else { + None + }; + + Ok(ParsedDateTime { + iso: iso_datetime, + calendar: calendar_cow, + offset: record.offset, + }) + } + + fn validate_and_build_zoned_date_time( + &self, + record: IxdtfParseRecord<'a, Utf8>, + ) -> TemporalResult> { + let date_record = record.date.ok_or_else(|| { + TemporalError::range() + .with_message("Date component is required for ZonedDateTime parsing") + })?; + + let time_record = record.time.ok_or_else(|| { + TemporalError::range() + .with_message("Time component is required for ZonedDateTime parsing") + })?; + + let iso_date = self.validate_date_record(date_record)?; + let iso_time = self.validate_time_record(time_record)?; + + let iso_datetime = IsoDateTime::new(iso_date, iso_time)?; + + let timezone_record = record.tz.ok_or_else(|| { + TemporalError::range() + .with_message("Timezone component is required for ZonedDateTime parsing") + })?; + + let timezone_bytes = match timezone_record.tz { + TimeZoneRecord::Name(name_bytes) => name_bytes, + TimeZoneRecord::Offset(_) => { + return Err( + TemporalError::range().with_message("Expected timezone name but found offset") + ); + } + _ => { + return Err(TemporalError::range().with_message("Unsupported timezone record type")); + } + }; + + Ok(ParsedZonedDateTime { + iso: iso_datetime, + calendar: record.calendar.map(Cow::Borrowed), + offset: record.offset, + timezone: Cow::Borrowed(timezone_bytes), + }) + } + + fn validate_and_build_zoned_date_time_utf16( + &self, + record: IxdtfParseRecord<'a, Utf16>, + ) -> TemporalResult> { + let date_record = record.date.ok_or_else(|| { + TemporalError::range() + .with_message("Date component is required for ZonedDateTime parsing") + })?; + + let time_record = record.time.ok_or_else(|| { + TemporalError::range() + .with_message("Time component is required for ZonedDateTime parsing") + })?; + + let timezone_record = record.tz.ok_or_else(|| { + TemporalError::range() + .with_message("TimeZone annotation is required for ZonedDateTime parsing") + })?; + + let iso_date = self.validate_date_record(date_record)?; + let iso_time = self.validate_time_record(time_record)?; + + let iso_datetime = IsoDateTime::new(iso_date, iso_time)?; + + let timezone_cow = match timezone_record.tz { + TimeZoneRecord::Name(name_utf16) => Self::convert_utf16_timezone_to_cow(name_utf16)?, + TimeZoneRecord::Offset(_) => { + return Err( + TemporalError::range().with_message("Expected timezone name but found offset") + ); + } + _ => { + return Err(TemporalError::range().with_message("Unsupported timezone record type")); + } + }; + + let calendar_cow = if let Some(calendar_utf16) = record.calendar { + Some(Self::convert_utf16_calendar_to_cow(calendar_utf16)?) + } else { + None + }; + + Ok(ParsedZonedDateTime { + iso: iso_datetime, + calendar: calendar_cow, + offset: record.offset, + timezone: timezone_cow, + }) + } + + fn validate_and_build_instant( + &self, + record: IxdtfParseInstantRecord, + ) -> TemporalResult { + let iso_date = self.validate_date_record(record.date)?; + let iso_time = self.validate_time_record(record.time)?; + + let iso_datetime = IsoDateTime::new(iso_date, iso_time)?; + + Ok(ParsedInstant { + iso: iso_datetime, + offset: record.offset, + }) + } + + fn validate_and_build_time(&self, record: TimeRecord) -> TemporalResult { + let iso_time = self.validate_time_record(record)?; + + Ok(ParsedTime { iso: iso_time }) + } + + fn validate_and_build_year_month( + &self, + record: IxdtfParseRecord<'a, Utf8>, + ) -> TemporalResult> { + let date_record = record.date.ok_or_else(|| { + TemporalError::range().with_message("Date component is required for YearMonth parsing") + })?; + + let iso_date = self.validate_date_record(date_record)?; + + Ok(ParsedYearMonth { + iso: iso_date, + calendar: record.calendar.map(Cow::Borrowed), + }) + } + + fn validate_and_build_year_month_utf16( + &self, + record: IxdtfParseRecord<'a, Utf16>, + ) -> TemporalResult> { + let date_record = record.date.ok_or_else(|| { + TemporalError::range().with_message("Date component is required for YearMonth parsing") + })?; + + let iso_date = self.validate_date_record(date_record)?; + + let calendar_cow = if let Some(calendar_utf16) = record.calendar { + Some(Self::convert_utf16_calendar_to_cow(calendar_utf16)?) + } else { + None + }; + + Ok(ParsedYearMonth { + iso: iso_date, + calendar: calendar_cow, + }) + } + + fn validate_and_build_month_day( + &self, + record: IxdtfParseRecord<'a, Utf8>, + ) -> TemporalResult> { + let date_record = record.date.ok_or_else(|| { + TemporalError::range().with_message("Date component is required for MonthDay parsing") + })?; + + let iso_date = self.validate_date_record(date_record)?; + + Ok(ParsedMonthDay { + iso: iso_date, + calendar: record.calendar.map(Cow::Borrowed), + }) + } + + fn validate_and_build_month_day_utf16( + &self, + record: IxdtfParseRecord<'a, Utf16>, + ) -> TemporalResult> { + let date_record = record.date.ok_or_else(|| { + TemporalError::range().with_message("Date component is required for MonthDay parsing") + })?; + + let iso_date = self.validate_date_record(date_record)?; + + let calendar_cow = if let Some(calendar_utf16) = record.calendar { + Some(Self::convert_utf16_calendar_to_cow(calendar_utf16)?) + } else { + None + }; + + Ok(ParsedMonthDay { + iso: iso_date, + calendar: calendar_cow, + }) + } + + /// Validates a date record using the shared validation logic + fn validate_date_record(&self, record: DateRecord) -> TemporalResult { + validate_date_record_impl(record).map_err(|e| e.into_temporal_error()) + } + + /// Validates a time record using the shared validation logic + fn validate_time_record(&self, record: TimeRecord) -> TemporalResult { + validate_time_record_impl(record).map_err(|e| e.into_temporal_error()) + } +} + +/// Parsed result for PlainDateTime operations +#[derive(Debug, Clone)] +pub struct ParsedDateTime<'a> { + /// The validated ISO DateTime components + pub iso: IsoDateTime, + /// Optional calendar identifier as bytes (borrowed for UTF-8, owned for UTF-16) + pub calendar: Option>, + /// Optional UTC offset information + pub offset: Option, +} + +impl<'a> ParsedDateTime<'a> { + /// Get the calendar identifier as a string slice, defaulting to "iso8601" + pub fn calendar(&self) -> &str { + self.calendar + .as_ref() + .and_then(|c| core::str::from_utf8(c.as_ref()).ok()) + .unwrap_or("iso8601") + } +} + +/// Parsed result for ZonedDateTime operations +#[derive(Debug, Clone)] +pub struct ParsedZonedDateTime<'a> { + /// The validated ISO DateTime components + pub iso: IsoDateTime, + /// Optional calendar identifier as bytes (borrowed for UTF-8, owned for UTF-16) + pub calendar: Option>, + /// Optional UTC offset information + pub offset: Option, + /// Time zone identifier as bytes (borrowed for UTF-8, owned for UTF-16) + pub timezone: Cow<'a, [u8]>, +} + +impl<'a> ParsedZonedDateTime<'a> { + /// Get the calendar identifier as a string slice, defaulting to "iso8601" + pub fn calendar(&self) -> &str { + self.calendar + .as_ref() + .and_then(|c| core::str::from_utf8(c.as_ref()).ok()) + .unwrap_or("iso8601") + } + + /// Get the timezone identifier as a string slice + pub fn timezone(&self) -> &str { + core::str::from_utf8(&self.timezone).unwrap_or("UTC") + } +} + +/// Parsed result for Instant operations +#[derive(Debug, Clone)] +pub struct ParsedInstant { + /// The validated ISO DateTime components + pub iso: IsoDateTime, + /// UTC offset information (required for instants) + pub offset: UtcOffsetRecordOrZ, +} + +/// Parsed result for PlainTime operations +#[derive(Debug, Clone)] +pub struct ParsedTime { + /// The validated ISO Time components + pub iso: IsoTime, +} + +/// Parsed result for PlainYearMonth operations +#[derive(Debug, Clone)] +pub struct ParsedYearMonth<'a> { + /// The validated ISO Date components + pub iso: IsoDate, + /// Optional calendar identifier as bytes (borrowed for UTF-8, owned for UTF-16) + pub calendar: Option>, +} + +impl<'a> ParsedYearMonth<'a> { + /// Get the calendar identifier as a string slice, defaulting to "iso8601" + pub fn calendar(&self) -> &str { + self.calendar + .as_ref() + .and_then(|c| core::str::from_utf8(c.as_ref()).ok()) + .unwrap_or("iso8601") + } +} + +/// Parsed result for PlainMonthDay operations +#[derive(Debug, Clone)] +pub struct ParsedMonthDay<'a> { + /// The validated ISO Date components + pub iso: IsoDate, + /// Optional calendar identifier as bytes (borrowed for UTF-8, owned for UTF-16) + pub calendar: Option>, +} + +impl<'a> ParsedMonthDay<'a> { + /// Get the calendar identifier as a string slice, defaulting to "iso8601" + pub fn calendar(&self) -> &str { + self.calendar + .as_ref() + .and_then(|c| core::str::from_utf8(c.as_ref()).ok()) + .unwrap_or("iso8601") + } +} + // TODO: Move `Writeable` functionality to `ixdtf` crate #[derive(Debug, Default)] @@ -82,7 +844,7 @@ impl<'a> IxdtfStringBuilder<'a> { self } - pub fn with_calendar(mut self, calendar: &'static str, show: DisplayCalendar) -> Self { + pub fn with_calendar(mut self, calendar: &'a str, show: DisplayCalendar) -> Self { self.inner.calendar = Some(FormattableCalendar { show, calendar }); self } @@ -694,7 +1456,7 @@ fn parse_ixdtf(source: &[u8], variant: ParseVariant) -> TemporalResult parser.parse_with_annotation_handler(handler), ParseVariant::Time => parser.parse_time_with_annotation_handler(handler), } - .map_err(|e| TemporalError::range().with_message(format!("{e}")))?; + .map_err(|e| map_parse_error(e).into_temporal_error())?; if critical_duplicate_calendar { // TODO: Add tests for the below. @@ -725,6 +1487,14 @@ pub(crate) fn parse_date_time(source: &[u8]) -> TemporalResult TemporalResult TemporalResult> { let record = parse_ixdtf(source.as_bytes(), ParseVariant::DateTime)?; - // TODO: Support rejecting subminute precision in time zone annootations + // TODO: Support rejecting subminute precision in time zone annotations if record.tz.is_none() { return Err(TemporalError::range() .with_message("Time zone annotation is required for parsing a zoned date time.")); } + // Only validate ECMAScript Temporal specific requirements + if let Some(date_record) = record.date { + validate_date_record_impl(date_record).map_err(|e| e.into_temporal_error())?; + } + if let Some(time_record) = record.time { + validate_time_record_impl(time_record).map_err(|e| e.into_temporal_error())?; + } + Ok(record) } @@ -764,6 +1542,10 @@ pub(crate) fn parse_instant(source: &[u8]) -> TemporalResult Option<&[u8]> { #[cfg(test)] mod tests { - use super::{FormattableDate, FormattableOffset}; + use super::{FormattableDate, FormattableOffset, TemporalParser}; use crate::parsers::{FormattableTime, Precision}; - use alloc::format; + use alloc::{format, string::String}; use writeable::assert_writeable_eq; #[test] @@ -984,4 +1766,474 @@ mod tests { let date = FormattableDate(-10_000, 12, 8); assert_writeable_eq!(date, "-010000-12-08"); } + + #[test] + fn temporal_parser_date_time() { + let parser = TemporalParser::from_str_as_utf8("2025-01-15T14:30:00"); + + let result = parser.parse_date_time(); + assert!(result.is_ok()); + let parsed = result.unwrap(); + assert_eq!(parsed.iso.date.year, 2025); + assert_eq!(parsed.iso.date.month, 1); + assert_eq!(parsed.iso.date.day, 15); + assert_eq!(parsed.iso.time.hour, 14); + assert_eq!(parsed.iso.time.minute, 30); + assert_eq!(parsed.iso.time.second, 0); + + let parser = TemporalParser::from_str_as_utf8("2025-01-15T14:30:00[u-ca=gregory]"); + let result = parser.parse_date_time(); + assert!(result.is_ok()); + let parsed = result.unwrap(); + assert!(parsed.calendar.is_some()); + assert_eq!(&*parsed.calendar.unwrap(), b"gregory"); + + let parser = TemporalParser::from_str_as_utf8("999999-01-15T14:30:00"); + let result = parser.parse_date_time(); + assert!(result.is_err()); + + let parser = TemporalParser::from_str_as_utf8("2025-13-15T14:30:00"); + let result = parser.parse_date_time(); + assert!(result.is_err()); + + let parser = TemporalParser::from_str_as_utf8("2025-02-30T14:30:00"); + let result = parser.parse_date_time(); + assert!(result.is_err()); + + let parser = TemporalParser::from_str_as_utf8("2025-01-15T25:30:00"); + let result = parser.parse_date_time(); + assert!(result.is_err()); + + let parser = TemporalParser::from_str_as_utf8("2025-01-15T14:60:00"); + let result = parser.parse_date_time(); + assert!(result.is_err()); + } + + #[test] + fn temporal_parser_instant() { + let parser = TemporalParser::from_str_as_utf8("2025-01-15T14:30:00Z"); + + let result = parser.parse_instant(); + assert!(result.is_ok()); + let parsed = result.unwrap(); + assert_eq!(parsed.iso.date.year, 2025); + + let parser = TemporalParser::from_str_as_utf8("2025-01-15T14:30:00+05:30"); + let result = parser.parse_instant(); + assert!(result.is_ok()); + + let parser = TemporalParser::from_str_as_utf8("2025-01-15T14:30:00"); + let result = parser.parse_instant(); + assert!(result.is_err()); + } + + #[test] + fn temporal_parser_zoned_date_time() { + let parser = TemporalParser::from_str_as_utf8("2025-01-15T14:30:00Z[America/New_York]"); + + let result = parser.parse_zoned_date_time(); + assert!(result.is_ok()); + let parsed = result.unwrap(); + assert_eq!(parsed.iso.date.year, 2025); + assert_eq!(&*parsed.timezone, b"America/New_York"); + + // Test without timezone annotation (should fail) + let parser = TemporalParser::from_str_as_utf8("2025-01-15T14:30:00Z"); + let result = parser.parse_zoned_date_time(); + assert!(result.is_err()); + } + + #[test] + fn temporal_parser_time() { + let parser = TemporalParser::from_str_as_utf8("14:30:00"); + + let result = parser.parse_time(); + assert!(result.is_ok()); + let parsed = result.unwrap(); + assert_eq!(parsed.iso.hour, 14); + assert_eq!(parsed.iso.minute, 30); + assert_eq!(parsed.iso.second, 0); + + let parser = TemporalParser::from_str_as_utf8("14:30:00.123456789"); + let result = parser.parse_time(); + assert!(result.is_ok()); + let parsed = result.unwrap(); + assert_eq!(parsed.iso.millisecond, 123); + assert_eq!(parsed.iso.microsecond, 456); + assert_eq!(parsed.iso.nanosecond, 789); + } + + #[test] + fn temporal_parser_year_month() { + let parser = TemporalParser::from_str_as_utf8("2025-01"); + + let result = parser.parse_year_month(); + assert!(result.is_ok()); + let parsed = result.unwrap(); + assert_eq!(parsed.iso.year, 2025); + assert_eq!(parsed.iso.month, 1); + + let parser = TemporalParser::from_str_as_utf8("2025-01[u-ca=hebrew]"); + let result = parser.parse_year_month(); + assert!(result.is_ok()); + let parsed = result.unwrap(); + assert!(parsed.calendar.is_some()); + assert_eq!(&*parsed.calendar.unwrap(), b"hebrew"); + } + + #[test] + fn temporal_parser_month_day() { + let parser = TemporalParser::from_str_as_utf8("01-15"); + + let result = parser.parse_month_day(); + assert!(result.is_ok()); + let parsed = result.unwrap(); + assert_eq!(parsed.iso.month, 1); + assert_eq!(parsed.iso.day, 15); + + let parser = TemporalParser::from_str_as_utf8("02-29"); + let result = parser.parse_month_day(); + assert!(result.is_ok()); // Should be OK as it could be valid in a leap year + } + + #[test] + fn temporal_parser_invariant_validation() { + let parser = TemporalParser::from_str_as_utf8("-271822-01-01T00:00:00"); + let result = parser.parse_date_time(); + assert!(result.is_err()); + + let parser = TemporalParser::from_str_as_utf8("275761-01-01T00:00:00"); + let result = parser.parse_date_time(); + assert!(result.is_err()); + + let parser = TemporalParser::from_str_as_utf8("2025-01-01T12:00:00"); + let result = parser.parse_date_time(); + assert!(result.is_ok()); + + let parser = TemporalParser::from_str_as_utf8("1970-01-01T12:00:00"); + let result = parser.parse_date_time(); + assert!(result.is_ok()); + + let parser = TemporalParser::from_str_as_utf8("2025-04-31T00:00:00"); // April has only 30 days + let result = parser.parse_date_time(); + assert!(result.is_err()); + + let parser = TemporalParser::from_str_as_utf8("2025-02-29T00:00:00"); // 2025 is not a leap year + let result = parser.parse_date_time(); + assert!(result.is_err()); + + let parser = TemporalParser::from_str_as_utf8("2024-02-29T00:00:00"); // 2024 is a leap year + let result = parser.parse_date_time(); + assert!(result.is_ok()); + } + + #[test] + fn temporal_parser_cow_strings() { + let parser = TemporalParser::from_str_as_utf8("2025-01-15T14:30:00"); + let result = parser.parse_date_time(); + assert!(result.is_ok()); + + let owned = String::from("2025-01-15T14:30:00"); + let parser = TemporalParser::from_str_as_utf8(&owned); + let result = parser.parse_date_time(); + assert!(result.is_ok()); + + let owned = String::from("2025-01-15T14:30:00"); + let parser = TemporalParser::from_str_as_utf8(&owned); + let result = parser.parse_date_time(); + assert!(result.is_ok()); + + use alloc::borrow::Cow; + let cow_borrowed: Cow = Cow::Borrowed("2025-01-15T14:30:00"); + let parser = TemporalParser::from_str_as_utf8(&cow_borrowed); + let result = parser.parse_date_time(); + assert!(result.is_ok()); + + let cow_owned: Cow = Cow::Owned(String::from("2025-01-15T14:30:00")); + let parser = TemporalParser::from_str_as_utf8(&cow_owned); + let result = parser.parse_date_time(); + assert!(result.is_ok()); + } + + #[test] + fn temporal_parser_better_error_messages() { + let parser = TemporalParser::from_str_as_utf8("999999-01-15T14:30:00"); + let result = parser.parse_date_time(); + assert!(result.is_err()); + + let parser = TemporalParser::from_str_as_utf8("2025-04-31T14:30:00"); // April only has 30 days + let result = parser.parse_date_time(); + assert!(result.is_err()); + + use super::validate_date_record_impl; + use ixdtf::records::DateRecord; + + let invalid_day_record = DateRecord { + year: 2025, + month: 4, + day: 31, + }; + // This should pass because validate_date_record_impl only checks year limits now + // Day validation is handled by ixdtf during parsing + let result = validate_date_record_impl(invalid_day_record); + assert!(result.is_ok()); + + let invalid_year_record = DateRecord { + year: 275761, // Beyond valid range + month: 1, + day: 1, + }; + let result = validate_date_record_impl(invalid_year_record); + assert!(result.is_err()); + let error = result.unwrap_err().into_temporal_error(); + let error_msg = format!("{error}"); + assert!(error_msg.contains("275761")); + assert!(error_msg.contains("outside valid range")); + } + + #[test] + fn temporal_parser_utf16_date_time() { + use alloc::vec::Vec; + + let datetime_str = "2023-12-25T15:30:45.678"; + let datetime_utf16: Vec = datetime_str.encode_utf16().collect(); + let parser = TemporalParser::from_utf16(&datetime_utf16); + + let result = parser.parse_date_time(); + assert!(result.is_ok()); + let parsed = result.unwrap(); + assert_eq!(parsed.iso.date.year, 2023); + assert_eq!(parsed.iso.date.month, 12); + assert_eq!(parsed.iso.date.day, 25); + assert_eq!(parsed.iso.time.hour, 15); + assert_eq!(parsed.iso.time.minute, 30); + assert_eq!(parsed.iso.time.second, 45); + assert_eq!(parsed.iso.time.millisecond, 678); + + // Calendar is None when no calendar annotation is present + assert!(parsed.calendar.is_none()); + } + + #[test] + fn temporal_parser_utf16_instant() { + use alloc::vec::Vec; + + let instant_str = "2023-12-25T15:30:45.678Z"; + let instant_utf16: Vec = instant_str.encode_utf16().collect(); + let parser = TemporalParser::from_utf16(&instant_utf16); + + let result = parser.parse_instant(); + assert!(result.is_ok()); + let parsed = result.unwrap(); + assert_eq!(parsed.iso.date.year, 2023); + assert_eq!(parsed.iso.date.month, 12); + assert_eq!(parsed.iso.date.day, 25); + assert_eq!(parsed.iso.time.hour, 15); + assert_eq!(parsed.iso.time.minute, 30); + assert_eq!(parsed.iso.time.second, 45); + assert_eq!(parsed.iso.time.millisecond, 678); + } + + #[test] + fn temporal_parser_utf16_time() { + use alloc::vec::Vec; + + let time_str = "15:30:45.678"; + let time_utf16: Vec = time_str.encode_utf16().collect(); + let parser = TemporalParser::from_utf16(&time_utf16); + + let result = parser.parse_time(); + assert!(result.is_ok()); + let parsed = result.unwrap(); + assert_eq!(parsed.iso.hour, 15); + assert_eq!(parsed.iso.minute, 30); + assert_eq!(parsed.iso.second, 45); + assert_eq!(parsed.iso.millisecond, 678); + } + + #[test] + fn temporal_parser_utf16_year_month() { + use alloc::vec::Vec; + + let ym_str = "2023-12"; + let ym_utf16: Vec = ym_str.encode_utf16().collect(); + let parser = TemporalParser::from_utf16(&ym_utf16); + + let result = parser.parse_year_month(); + assert!(result.is_ok()); + let parsed = result.unwrap(); + assert_eq!(parsed.iso.year, 2023); + assert_eq!(parsed.iso.month, 12); + + // Calendar is None when no calendar annotation is present + assert!(parsed.calendar.is_none()); + } + + #[test] + fn temporal_parser_utf16_month_day() { + use alloc::vec::Vec; + + let md_str = "12-25"; + let md_utf16: Vec = md_str.encode_utf16().collect(); + let parser = TemporalParser::from_utf16(&md_utf16); + + let result = parser.parse_month_day(); + assert!(result.is_ok()); + let parsed = result.unwrap(); + assert_eq!(parsed.iso.month, 12); + assert_eq!(parsed.iso.day, 25); + + // Calendar is None when no calendar annotation is present + assert!(parsed.calendar.is_none()); + } + + #[test] + fn temporal_parser_utf16_zoned_date_time() { + use alloc::vec::Vec; + + let zdt_str = "2023-12-25T15:30:45.678Z[America/New_York]"; + let zdt_utf16: Vec = zdt_str.encode_utf16().collect(); + let parser = TemporalParser::from_utf16(&zdt_utf16); + + let result = parser.parse_zoned_date_time(); + assert!(result.is_ok()); + let parsed = result.unwrap(); + assert_eq!(parsed.iso.date.year, 2023); + assert_eq!(parsed.iso.date.month, 12); + assert_eq!(parsed.iso.date.day, 25); + assert_eq!(parsed.iso.time.hour, 15); + assert_eq!(parsed.iso.time.minute, 30); + assert_eq!(parsed.iso.time.second, 45); + assert_eq!(parsed.iso.time.millisecond, 678); + + assert_eq!(parsed.timezone(), "America/New_York"); + + // Calendar is None when no calendar annotation is present + assert!(parsed.calendar.is_none()); + } + + #[test] + fn temporal_parser_utf16_vs_utf8_comparison() { + use alloc::vec::Vec; + + let datetime_str = "2023-06-15T10:20:30.456"; + let datetime_utf16: Vec = datetime_str.encode_utf16().collect(); + + let parser_utf8 = TemporalParser::from_str_as_utf8(datetime_str); + let parser_utf16 = TemporalParser::from_utf16(&datetime_utf16); + + let result_utf8 = parser_utf8.parse_date_time().unwrap(); + let result_utf16 = parser_utf16.parse_date_time().unwrap(); + + // Compare ISO components (should be identical) + assert_eq!(result_utf8.iso.date.year, result_utf16.iso.date.year); + assert_eq!(result_utf8.iso.date.month, result_utf16.iso.date.month); + assert_eq!(result_utf8.iso.date.day, result_utf16.iso.date.day); + assert_eq!(result_utf8.iso.time.hour, result_utf16.iso.time.hour); + assert_eq!(result_utf8.iso.time.minute, result_utf16.iso.time.minute); + assert_eq!(result_utf8.iso.time.second, result_utf16.iso.time.second); + assert_eq!( + result_utf8.iso.time.millisecond, + result_utf16.iso.time.millisecond + ); + assert_eq!( + result_utf8.iso.time.microsecond, + result_utf16.iso.time.microsecond + ); + assert_eq!( + result_utf8.iso.time.nanosecond, + result_utf16.iso.time.nanosecond + ); + } + + #[test] + fn temporal_parser_utf16_error_handling() { + use alloc::vec::Vec; + + let invalid_str = "2023-02-30T15:30:45"; // February 30th doesn't exist + let invalid_utf16: Vec = invalid_str.encode_utf16().collect(); + let parser = TemporalParser::from_utf16(&invalid_utf16); + + let result = parser.parse_date_time(); + assert!(result.is_err()); + + let invalid_year_str = "999999-01-01T00:00:00"; + let invalid_year_utf16: Vec = invalid_year_str.encode_utf16().collect(); + let parser = TemporalParser::from_utf16(&invalid_year_utf16); + + let result = parser.parse_date_time(); + assert!(result.is_err()); + } + + #[test] + fn temporal_parser_utf16_calendar_support() { + use alloc::vec::Vec; + + let datetime_str = "2023-12-25T15:30:45[u-ca=gregory]"; + let datetime_utf16: Vec = datetime_str.encode_utf16().collect(); + let parser = TemporalParser::from_utf16(&datetime_utf16); + + let result = parser.parse_date_time(); + assert!(result.is_ok()); + let parsed = result.unwrap(); + + assert!(parsed.calendar.is_some()); + assert_eq!(&*parsed.calendar.unwrap(), b"gregory"); + + let iso_str = "2023-12-25T15:30:45[u-ca=iso8601]"; + let iso_utf16: Vec = iso_str.encode_utf16().collect(); + let parser = TemporalParser::from_utf16(&iso_utf16); + + let result = parser.parse_date_time(); + assert!(result.is_ok()); + let parsed = result.unwrap(); + assert!(parsed.calendar.is_some()); + assert_eq!(&*parsed.calendar.unwrap(), b"iso8601"); + + let custom_str = "2023-12-25T15:30:45[u-ca=my-custom-calendar]"; + let custom_utf16: Vec = custom_str.encode_utf16().collect(); + let parser = TemporalParser::from_utf16(&custom_utf16); + + let result = parser.parse_date_time(); + assert!(result.is_ok()); + let parsed = result.unwrap(); + + assert!(parsed.calendar.is_some()); + assert_eq!(&*parsed.calendar.unwrap(), b"my-custom-calendar"); + } + + #[test] + fn temporal_parser_utf16_timezone_names() { + use alloc::vec::Vec; + + let timezones = [ + ("2023-12-25T15:30:45Z[UTC]", "UTC"), + ("2023-12-25T15:30:45Z[America/New_York]", "America/New_York"), + ("2023-12-25T15:30:45Z[Europe/London]", "Europe/London"), + ("2023-12-25T15:30:45Z[Asia/Tokyo]", "Asia/Tokyo"), + ("2023-12-25T15:30:45Z[Australia/Sydney]", "Australia/Sydney"), + ( + "2023-12-25T15:30:45Z[America/Los_Angeles]", + "America/Los_Angeles", + ), + ("2023-12-25T15:30:45Z[Europe/Berlin]", "Europe/Berlin"), + ]; + + for (input, expected_tz) in timezones.iter() { + let input_utf16: Vec = input.encode_utf16().collect(); + let parser = TemporalParser::from_utf16(&input_utf16); + + let result = parser.parse_zoned_date_time(); + assert!(result.is_ok(), "Failed to parse: {input}"); + + let parsed = result.unwrap(); + assert_eq!( + parsed.timezone(), + *expected_tz, + "Timezone mismatch for: {input}" + ); + + assert_eq!(&*parsed.timezone, expected_tz.as_bytes()); + } + } }