diff --git a/README.md b/README.md index 36bdaa0..bd58ff4 100644 --- a/README.md +++ b/README.md @@ -92,15 +92,15 @@ C++ library, here are few brief notes: ## Benchmarks -Below is the table of average timings in nanoseconds for parsing a single number +Below is the table of best timings in nanoseconds for parsing a single number into a 64-bit float. | | `canada` | `mesh` | `uniform` | `iidi` | `iei` | `rec32` | | ---------------- | -------- | -------- | --------- | ------ | ------ | ------- | -| fast-float | 22.08 | 11.10 | 20.04 | 40.77 | 26.33 | 29.84 | -| lexical | 61.63 | 25.10 | 53.77 | 72.33 | 53.39 | 72.40 | -| lexical/lossy | 61.51 | 25.24 | 54.00 | 71.30 | 52.87 | 71.71 | -| from_str | 175.07 | 22.58 | 103.00 | 228.78 | 115.76 | 211.13 | +| fast-float | 21.58 | 10.70 | 19.36 | 40.50 | 26.07 | 29.13 | +| lexical | 65.90 | 23.28 | 54.75 | 75.80 | 52.18 | 75.36 | +| lexical/lossy | 65.90 | 23.28 | 54.75 | 75.80 | 52.18 | 75.36 | +| from_str | 174.43 | 22.30 | 99.93 | 227.76 | 111.31 | 204.46 | | fast_float (C++) | 22.78 | 10.99 | 20.05 | 41.12 | 27.51 | 30.85 | | abseil (C++) | 42.66 | 32.88 | 46.01 | 50.83 | 46.33 | 49.95 | | netlib (C++) | 57.53 | 24.86 | 64.72 | 56.63 | 36.20 | 67.29 | @@ -109,8 +109,7 @@ into a 64-bit float. Parsers: - `fast-float` - this very crate -- `lexical` – from `lexical_core` crate, v0.7 -- `lexical/lossy` - from `lexical_core` crate, v0.7 (lossy parser) +- `lexical` – `lexical_core`, v0.7 (non-lossy; same performance as lossy) - `from_str` – Rust standard library, `FromStr` trait - `fast_float (C++)` – original C++ implementation of 'fast-float' method - `abseil (C++)` – Abseil C++ Common Libraries diff --git a/extras/simple-bench/src/main.rs b/extras/simple-bench/src/main.rs index 23bd621..9428fad 100644 --- a/extras/simple-bench/src/main.rs +++ b/extras/simple-bench/src/main.rs @@ -7,7 +7,7 @@ use std::str::FromStr; use std::time::Instant; use fastrand::Rng; -use lexical::{FromLexical, FromLexicalLossy}; +use lexical::FromLexical; use structopt::StructOpt; use fast_float::FastFloat; @@ -109,7 +109,6 @@ fn run_bench T>( enum Method { FastFloat, Lexical, - LexicalLossy, FromStr, } @@ -126,12 +125,11 @@ impl Method { match self { Self::FastFloat => "fast-float", Self::Lexical => "lexical", - Self::LexicalLossy => "lexical/lossy", Self::FromStr => "from_str", } } - fn run_as( + fn run_as( &self, input: &Input, repeat: usize, @@ -147,11 +145,6 @@ impl Method { .unwrap_or_default() .0 }), - Self::LexicalLossy => run_bench(data, repeat, |s: &str| { - lexical_core::parse_partial_lossy::(s.as_bytes()) - .unwrap_or_default() - .0 - }), Self::FromStr => run_bench(data, repeat, |s: &str| s.parse::().unwrap_or_default()), }; @@ -172,12 +165,7 @@ impl Method { } pub fn all() -> &'static [Self] { - &[ - Method::FastFloat, - Method::Lexical, - Method::LexicalLossy, - Method::FromStr, - ] + &[Method::FastFloat, Method::Lexical, Method::FromStr] } } diff --git a/src/decimal.rs b/src/decimal.rs index 43112c9..c36d6d3 100644 --- a/src/decimal.rs +++ b/src/decimal.rs @@ -190,6 +190,7 @@ impl Decimal { pub fn parse_decimal(mut s: &[u8]) -> Decimal { // can't fail since it follows a call to parse_number let mut d = Decimal::default(); + let start = s; let c = s.get_first(); d.negative = c == b'-'; if c == b'-' || c == b'+' { @@ -217,6 +218,24 @@ pub fn parse_decimal(mut s: &[u8]) -> Decimal { parse_digits(&mut s, |digit| d.try_add_digit(digit)); d.decimal_point = s.len() as i32 - first.len() as i32; } + if d.num_digits != 0 { + // Ignore the trailing zeros if there are any + let mut n_trailing_zeros = 0; + for &c in start[..(start.len() - s.len())].iter().rev() { + if c == b'0' { + n_trailing_zeros += 1; + } else if c != b'.' { + break; + } + } + d.decimal_point += n_trailing_zeros as i32; + d.num_digits -= n_trailing_zeros; + d.decimal_point += d.num_digits as i32; + if d.num_digits > Decimal::MAX_DIGITS { + d.truncated = true; + d.num_digits = Decimal::MAX_DIGITS; + } + } if s.check_first2(b'e', b'E') { s = s.advance(1); let mut neg_exp = false; @@ -234,11 +253,6 @@ pub fn parse_decimal(mut s: &[u8]) -> Decimal { }); d.decimal_point += if neg_exp { -exp_num } else { exp_num }; } - d.decimal_point += d.num_digits as i32; - if d.num_digits > Decimal::MAX_DIGITS { - d.truncated = true; - d.num_digits = Decimal::MAX_DIGITS; - } for i in d.num_digits..Decimal::MAX_DIGITS_WITHOUT_OVERFLOW { d.digits[i] = 0; } diff --git a/src/number.rs b/src/number.rs index 84c0b1e..ecb76c2 100644 --- a/src/number.rs +++ b/src/number.rs @@ -68,9 +68,8 @@ fn try_parse_19digits(s: &mut AsciiStr<'_>, x: &mut u64) { } #[inline] -fn try_parse_8digits_le(s: &mut AsciiStr<'_>, x: &mut u64) -> usize { +fn try_parse_8digits_le(s: &mut AsciiStr<'_>, x: &mut u64) { // may cause overflows, to be handled later - let mut count = 0; if cfg!(target_endian = "little") { if let Some(v) = s.try_read_u64() { if is_8digits_le(v) { @@ -78,20 +77,17 @@ fn try_parse_8digits_le(s: &mut AsciiStr<'_>, x: &mut u64) -> usize { .wrapping_mul(1_0000_0000) .wrapping_add(parse_8digits_le(v)); s.step_by(8); - count = 8; if let Some(v) = s.try_read_u64() { if is_8digits_le(v) { *x = x .wrapping_mul(1_0000_0000) .wrapping_add(parse_8digits_le(v)); s.step_by(8); - count = 16; } } } } } - count } #[inline] @@ -124,18 +120,22 @@ fn parse_scientific(s: &mut AsciiStr<'_>) -> i64 { #[inline] pub fn parse_number(s: &[u8]) -> Option<(Number, usize)> { - // assuming s.len() >= 1 + debug_assert!(!s.is_empty()); + let mut s = AsciiStr::new(s); let start = s; // handle optional +/- sign let mut negative = false; - if s.first_either(b'-', b'+') { - negative = s.first_is(b'-'); + if s.first() == b'-' { + negative = true; if s.step().is_empty() { return None; } + } else if s.first() == b'+' && s.step().is_empty() { + return None; } + debug_assert!(!s.is_empty()); // parse initial digits before dot let mut mantissa = 0_u64; @@ -171,8 +171,7 @@ pub fn parse_number(s: &[u8]) -> Option<(Number, usize)> { let len = s.offset_from(&start) as _; // handle uncommon case with many digits - n_digits -= 19; - if n_digits <= 0 { + if n_digits <= 19 { return Some(( Number { exponent, @@ -184,6 +183,7 @@ pub fn parse_number(s: &[u8]) -> Option<(Number, usize)> { )); } + n_digits -= 19; let mut many_digits = false; let mut p = digits_start; while p.check_first_either(b'0', b'.') { diff --git a/tests/test_basic.rs b/tests/test_basic.rs index b444532..90d736b 100644 --- a/tests/test_basic.rs +++ b/tests/test_basic.rs @@ -21,13 +21,14 @@ macro_rules! check { check!($ty, $s, core::$ty::NEG_INFINITY) }}; ($ty:ident, $s:expr, $e:expr) => {{ - let s = $s.as_bytes(); + let string = String::from($s); + let s = string.as_bytes(); let expected: $ty = $e; let result = fast_float::parse::<$ty, _>(s).unwrap(); assert_eq!(result, expected); let lex = lexical_core::parse::<$ty>(s).unwrap(); assert_eq!(result, lex); - let std = <$ty>::from_str($s); + let std = <$ty>::from_str(string.as_str()); if let Ok(std) = std { // stdlib can't parse all weird floats if std.is_finite() && result.is_finite() { @@ -84,6 +85,14 @@ macro_rules! check_f64_neg_inf { }; } +fn append_zeros(s: impl AsRef, n: usize) -> String { + let mut s = String::from(s.as_ref()); + for _ in 0..n { + s.push('0'); + } + s +} + #[test] fn test_f64_inf() { check_f64_inf!("INF"); @@ -200,6 +209,17 @@ fn test_f64_long() { #[test] fn test_f64_general() { + check_f64!("9007199254740993.0", hexf64("0x1.p+53")); + check_f64!(append_zeros("9007199254740993.0", 1000), hexf64("0x1.p+53")); + check_f64!("10000000000000000000", hexf64("0x1.158e460913dp+63")); + check_f64!( + "10000000000000000000000000000001000000000000", + hexf64("0x1.cb2d6f618c879p+142") + ); + check_f64!( + "10000000000000000000000000000000000000000001", + hexf64("0x1.cb2d6f618c879p+142") + ); check_f64!(1.1920928955078125e-07); check_f64!("-0", -0.0); check_f64!( @@ -277,6 +297,23 @@ fn test_f32_inf() { #[test] fn test_f32_basic() { + let f1 = "\ + 1.175494140627517859246175898662808184331245864732796240031385942718174675986064\ + 7699724722770042717456817626953125"; + check_f32!(f1, hexf32("0x1.2ced3p+0")); + check_f32!(format!("{}e-38", f1), hexf32("0x1.fffff8p-127")); + check_f32!( + format!("{}e-38", append_zeros(f1, 655)), + hexf32("0x1.fffff8p-127") + ); + check_f32!( + format!("{}e-38", append_zeros(f1, 656)), + hexf32("0x1.fffff8p-127") + ); + check_f32!( + format!("{}e-38", append_zeros(f1, 1000)), + hexf32("0x1.fffff8p-127") + ); check_f32!(1.00000006e+09); check_f32!(1.4012984643e-45); check_f32!(1.1754942107e-38); diff --git a/tests/test_exhaustive.rs b/tests/test_exhaustive.rs index 5b51cc6..15c07a5 100644 --- a/tests/test_exhaustive.rs +++ b/tests/test_exhaustive.rs @@ -1,6 +1,6 @@ #[test] #[ignore] -fn test_f32_exhaustive() { +fn test_f32_exhaustive_ryu() { let mut buf = ryu::Buffer::new(); for i in 0..0xFFFF_FFFF_u32 { let a: f32 = unsafe { core::mem::transmute(i) }; @@ -9,3 +9,15 @@ fn test_f32_exhaustive() { assert!(a == b || (a.is_nan() && b.is_nan())); } } + +#[test] +#[ignore] +fn test_f32_exhaustive_lexical() { + let mut buf = [0; 1024]; + for i in 0..0xFFFF_FFFF_u32 { + let a: f32 = unsafe { core::mem::transmute(i) }; + let s = lexical_core::write(a, &mut buf); + let b: f32 = fast_float::parse(s).unwrap(); + assert!(a == b || (a.is_nan() && b.is_nan())); + } +}