From c78199b9b1ff8a07fce12ac5a0543ea27dd3bd17 Mon Sep 17 00:00:00 2001 From: Ivan Smirnov <i.s.smirnov@gmail.com> Date: Mon, 11 Jan 2021 17:58:55 +0000 Subject: [PATCH 1/9] Remove redundant return value from a function --- src/number.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/number.rs b/src/number.rs index 84c0b1e..03ca708 100644 --- a/src/number.rs +++ b/src/number.rs @@ -68,9 +68,8 @@ fn try_parse_19digits(s: &mut AsciiStr<'_>, x: &mut u64) { } #[inline] -fn try_parse_8digits_le(s: &mut AsciiStr<'_>, x: &mut u64) -> usize { +fn try_parse_8digits_le(s: &mut AsciiStr<'_>, x: &mut u64) { // may cause overflows, to be handled later - let mut count = 0; if cfg!(target_endian = "little") { if let Some(v) = s.try_read_u64() { if is_8digits_le(v) { @@ -78,20 +77,17 @@ fn try_parse_8digits_le(s: &mut AsciiStr<'_>, x: &mut u64) -> usize { .wrapping_mul(1_0000_0000) .wrapping_add(parse_8digits_le(v)); s.step_by(8); - count = 8; if let Some(v) = s.try_read_u64() { if is_8digits_le(v) { *x = x .wrapping_mul(1_0000_0000) .wrapping_add(parse_8digits_le(v)); s.step_by(8); - count = 16; } } } } } - count } #[inline] From 36cb83364b85077fe08c17ab8abdecd8124164d5 Mon Sep 17 00:00:00 2001 From: Ivan Smirnov <i.s.smirnov@gmail.com> Date: Mon, 11 Jan 2021 17:59:30 +0000 Subject: [PATCH 2/9] Minor speedup for +/- sign parsing --- src/number.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/number.rs b/src/number.rs index 03ca708..52efc61 100644 --- a/src/number.rs +++ b/src/number.rs @@ -126,11 +126,13 @@ pub fn parse_number(s: &[u8]) -> Option<(Number, usize)> { // handle optional +/- sign let mut negative = false; - if s.first_either(b'-', b'+') { - negative = s.first_is(b'-'); + if s.first() == b'-' { + negative = true; if s.step().is_empty() { return None; } + } else if s.first() == b'+' && s.step().is_empty() { + return None; } // parse initial digits before dot From 9cebea96c9e671ff9af5fe300ad2343a68ebb6aa Mon Sep 17 00:00:00 2001 From: Ivan Smirnov <i.s.smirnov@gmail.com> Date: Mon, 11 Jan 2021 17:59:51 +0000 Subject: [PATCH 3/9] Add a few debug asserts re: non-empty string --- src/number.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/number.rs b/src/number.rs index 52efc61..bf02a3d 100644 --- a/src/number.rs +++ b/src/number.rs @@ -120,7 +120,8 @@ fn parse_scientific(s: &mut AsciiStr<'_>) -> i64 { #[inline] pub fn parse_number(s: &[u8]) -> Option<(Number, usize)> { - // assuming s.len() >= 1 + debug_assert!(!s.is_empty()); + let mut s = AsciiStr::new(s); let start = s; @@ -134,6 +135,7 @@ pub fn parse_number(s: &[u8]) -> Option<(Number, usize)> { } else if s.first() == b'+' && s.step().is_empty() { return None; } + debug_assert!(!s.is_empty()); // parse initial digits before dot let mut mantissa = 0_u64; From aa2e9f9b8d834ae25ca2cdae49b268655a2443ff Mon Sep 17 00:00:00 2001 From: Ivan Smirnov <i.s.smirnov@gmail.com> Date: Mon, 11 Jan 2021 18:30:11 +0000 Subject: [PATCH 4/9] Another minor optimization in parse_number() --- src/number.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/number.rs b/src/number.rs index bf02a3d..ecb76c2 100644 --- a/src/number.rs +++ b/src/number.rs @@ -171,8 +171,7 @@ pub fn parse_number(s: &[u8]) -> Option<(Number, usize)> { let len = s.offset_from(&start) as _; // handle uncommon case with many digits - n_digits -= 19; - if n_digits <= 0 { + if n_digits <= 19 { return Some(( Number { exponent, @@ -184,6 +183,7 @@ pub fn parse_number(s: &[u8]) -> Option<(Number, usize)> { )); } + n_digits -= 19; let mut many_digits = false; let mut p = digits_start; while p.check_first_either(b'0', b'.') { From 0bead7f3188fcaed6ac467339c157fcf40ae7ba8 Mon Sep 17 00:00:00 2001 From: Ivan Smirnov <i.s.smirnov@gmail.com> Date: Mon, 11 Jan 2021 19:14:06 +0000 Subject: [PATCH 5/9] Add exhaustive f32 roundtrip via lexical_core --- tests/test_exhaustive.rs | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/tests/test_exhaustive.rs b/tests/test_exhaustive.rs index 5b51cc6..15c07a5 100644 --- a/tests/test_exhaustive.rs +++ b/tests/test_exhaustive.rs @@ -1,6 +1,6 @@ #[test] #[ignore] -fn test_f32_exhaustive() { +fn test_f32_exhaustive_ryu() { let mut buf = ryu::Buffer::new(); for i in 0..0xFFFF_FFFF_u32 { let a: f32 = unsafe { core::mem::transmute(i) }; @@ -9,3 +9,15 @@ fn test_f32_exhaustive() { assert!(a == b || (a.is_nan() && b.is_nan())); } } + +#[test] +#[ignore] +fn test_f32_exhaustive_lexical() { + let mut buf = [0; 1024]; + for i in 0..0xFFFF_FFFF_u32 { + let a: f32 = unsafe { core::mem::transmute(i) }; + let s = lexical_core::write(a, &mut buf); + let b: f32 = fast_float::parse(s).unwrap(); + assert!(a == b || (a.is_nan() && b.is_nan())); + } +} From 904bd06fb573e8a66ef431734a3b0f26832e3737 Mon Sep 17 00:00:00 2001 From: Ivan Smirnov <i.s.smirnov@gmail.com> Date: Mon, 11 Jan 2021 22:41:13 +0000 Subject: [PATCH 6/9] Add a (now-failing) test for issue #4 (decimals) --- tests/test_basic.rs | 41 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 39 insertions(+), 2 deletions(-) diff --git a/tests/test_basic.rs b/tests/test_basic.rs index b444532..90d736b 100644 --- a/tests/test_basic.rs +++ b/tests/test_basic.rs @@ -21,13 +21,14 @@ macro_rules! check { check!($ty, $s, core::$ty::NEG_INFINITY) }}; ($ty:ident, $s:expr, $e:expr) => {{ - let s = $s.as_bytes(); + let string = String::from($s); + let s = string.as_bytes(); let expected: $ty = $e; let result = fast_float::parse::<$ty, _>(s).unwrap(); assert_eq!(result, expected); let lex = lexical_core::parse::<$ty>(s).unwrap(); assert_eq!(result, lex); - let std = <$ty>::from_str($s); + let std = <$ty>::from_str(string.as_str()); if let Ok(std) = std { // stdlib can't parse all weird floats if std.is_finite() && result.is_finite() { @@ -84,6 +85,14 @@ macro_rules! check_f64_neg_inf { }; } +fn append_zeros(s: impl AsRef<str>, n: usize) -> String { + let mut s = String::from(s.as_ref()); + for _ in 0..n { + s.push('0'); + } + s +} + #[test] fn test_f64_inf() { check_f64_inf!("INF"); @@ -200,6 +209,17 @@ fn test_f64_long() { #[test] fn test_f64_general() { + check_f64!("9007199254740993.0", hexf64("0x1.p+53")); + check_f64!(append_zeros("9007199254740993.0", 1000), hexf64("0x1.p+53")); + check_f64!("10000000000000000000", hexf64("0x1.158e460913dp+63")); + check_f64!( + "10000000000000000000000000000001000000000000", + hexf64("0x1.cb2d6f618c879p+142") + ); + check_f64!( + "10000000000000000000000000000000000000000001", + hexf64("0x1.cb2d6f618c879p+142") + ); check_f64!(1.1920928955078125e-07); check_f64!("-0", -0.0); check_f64!( @@ -277,6 +297,23 @@ fn test_f32_inf() { #[test] fn test_f32_basic() { + let f1 = "\ + 1.175494140627517859246175898662808184331245864732796240031385942718174675986064\ + 7699724722770042717456817626953125"; + check_f32!(f1, hexf32("0x1.2ced3p+0")); + check_f32!(format!("{}e-38", f1), hexf32("0x1.fffff8p-127")); + check_f32!( + format!("{}e-38", append_zeros(f1, 655)), + hexf32("0x1.fffff8p-127") + ); + check_f32!( + format!("{}e-38", append_zeros(f1, 656)), + hexf32("0x1.fffff8p-127") + ); + check_f32!( + format!("{}e-38", append_zeros(f1, 1000)), + hexf32("0x1.fffff8p-127") + ); check_f32!(1.00000006e+09); check_f32!(1.4012984643e-45); check_f32!(1.1754942107e-38); From 56ac048a96c10c014d5398bae9e548b929616228 Mon Sep 17 00:00:00 2001 From: Ivan Smirnov <i.s.smirnov@gmail.com> Date: Mon, 11 Jan 2021 22:41:42 +0000 Subject: [PATCH 7/9] Ignore trailing 0s when parsing decimals (fix #4) --- src/decimal.rs | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/src/decimal.rs b/src/decimal.rs index 43112c9..c36d6d3 100644 --- a/src/decimal.rs +++ b/src/decimal.rs @@ -190,6 +190,7 @@ impl Decimal { pub fn parse_decimal(mut s: &[u8]) -> Decimal { // can't fail since it follows a call to parse_number let mut d = Decimal::default(); + let start = s; let c = s.get_first(); d.negative = c == b'-'; if c == b'-' || c == b'+' { @@ -217,6 +218,24 @@ pub fn parse_decimal(mut s: &[u8]) -> Decimal { parse_digits(&mut s, |digit| d.try_add_digit(digit)); d.decimal_point = s.len() as i32 - first.len() as i32; } + if d.num_digits != 0 { + // Ignore the trailing zeros if there are any + let mut n_trailing_zeros = 0; + for &c in start[..(start.len() - s.len())].iter().rev() { + if c == b'0' { + n_trailing_zeros += 1; + } else if c != b'.' { + break; + } + } + d.decimal_point += n_trailing_zeros as i32; + d.num_digits -= n_trailing_zeros; + d.decimal_point += d.num_digits as i32; + if d.num_digits > Decimal::MAX_DIGITS { + d.truncated = true; + d.num_digits = Decimal::MAX_DIGITS; + } + } if s.check_first2(b'e', b'E') { s = s.advance(1); let mut neg_exp = false; @@ -234,11 +253,6 @@ pub fn parse_decimal(mut s: &[u8]) -> Decimal { }); d.decimal_point += if neg_exp { -exp_num } else { exp_num }; } - d.decimal_point += d.num_digits as i32; - if d.num_digits > Decimal::MAX_DIGITS { - d.truncated = true; - d.num_digits = Decimal::MAX_DIGITS; - } for i in d.num_digits..Decimal::MAX_DIGITS_WITHOUT_OVERFLOW { d.digits[i] = 0; } From 743df140c32be4caa130432911f74be1f761bf52 Mon Sep 17 00:00:00 2001 From: Ivan Smirnov <i.s.smirnov@gmail.com> Date: Mon, 11 Jan 2021 22:52:45 +0000 Subject: [PATCH 8/9] Update the benchmarks table with recent results --- README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 36bdaa0..8ae3d35 100644 --- a/README.md +++ b/README.md @@ -92,15 +92,15 @@ C++ library, here are few brief notes: ## Benchmarks -Below is the table of average timings in nanoseconds for parsing a single number +Below is the table of best timings in nanoseconds for parsing a single number into a 64-bit float. | | `canada` | `mesh` | `uniform` | `iidi` | `iei` | `rec32` | | ---------------- | -------- | -------- | --------- | ------ | ------ | ------- | -| fast-float | 22.08 | 11.10 | 20.04 | 40.77 | 26.33 | 29.84 | -| lexical | 61.63 | 25.10 | 53.77 | 72.33 | 53.39 | 72.40 | -| lexical/lossy | 61.51 | 25.24 | 54.00 | 71.30 | 52.87 | 71.71 | -| from_str | 175.07 | 22.58 | 103.00 | 228.78 | 115.76 | 211.13 | +| fast-float | 21.58 | 10.70 | 19.36 | 40.50 | 26.07 | 29.13 | +| lexical | 65.90 | 23.28 | 54.75 | 75.80 | 52.18 | 75.36 | +| lexical/lossy | 65.90 | 23.28 | 54.75 | 75.80 | 52.18 | 75.36 | +| from_str | 174.43 | 22.30 | 99.93 | 227.76 | 111.31 | 204.46 | | fast_float (C++) | 22.78 | 10.99 | 20.05 | 41.12 | 27.51 | 30.85 | | abseil (C++) | 42.66 | 32.88 | 46.01 | 50.83 | 46.33 | 49.95 | | netlib (C++) | 57.53 | 24.86 | 64.72 | 56.63 | 36.20 | 67.29 | From 68729215fc818a005c916d894e9284ea80477814 Mon Sep 17 00:00:00 2001 From: Ivan Smirnov <i.s.smirnov@gmail.com> Date: Mon, 11 Jan 2021 23:04:25 +0000 Subject: [PATCH 9/9] Remove lexical/lossy from benches (redundant) --- README.md | 3 +-- extras/simple-bench/src/main.rs | 18 +++--------------- 2 files changed, 4 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index 8ae3d35..bd58ff4 100644 --- a/README.md +++ b/README.md @@ -109,8 +109,7 @@ into a 64-bit float. Parsers: - `fast-float` - this very crate -- `lexical` – from `lexical_core` crate, v0.7 -- `lexical/lossy` - from `lexical_core` crate, v0.7 (lossy parser) +- `lexical` – `lexical_core`, v0.7 (non-lossy; same performance as lossy) - `from_str` – Rust standard library, `FromStr` trait - `fast_float (C++)` – original C++ implementation of 'fast-float' method - `abseil (C++)` – Abseil C++ Common Libraries diff --git a/extras/simple-bench/src/main.rs b/extras/simple-bench/src/main.rs index 23bd621..9428fad 100644 --- a/extras/simple-bench/src/main.rs +++ b/extras/simple-bench/src/main.rs @@ -7,7 +7,7 @@ use std::str::FromStr; use std::time::Instant; use fastrand::Rng; -use lexical::{FromLexical, FromLexicalLossy}; +use lexical::FromLexical; use structopt::StructOpt; use fast_float::FastFloat; @@ -109,7 +109,6 @@ fn run_bench<T: FastFloat, F: Fn(&str) -> T>( enum Method { FastFloat, Lexical, - LexicalLossy, FromStr, } @@ -126,12 +125,11 @@ impl Method { match self { Self::FastFloat => "fast-float", Self::Lexical => "lexical", - Self::LexicalLossy => "lexical/lossy", Self::FromStr => "from_str", } } - fn run_as<T: FastFloat + FromLexical + FromLexicalLossy + FromStr>( + fn run_as<T: FastFloat + FromLexical + FromStr>( &self, input: &Input, repeat: usize, @@ -147,11 +145,6 @@ impl Method { .unwrap_or_default() .0 }), - Self::LexicalLossy => run_bench(data, repeat, |s: &str| { - lexical_core::parse_partial_lossy::<T>(s.as_bytes()) - .unwrap_or_default() - .0 - }), Self::FromStr => run_bench(data, repeat, |s: &str| s.parse::<T>().unwrap_or_default()), }; @@ -172,12 +165,7 @@ impl Method { } pub fn all() -> &'static [Self] { - &[ - Method::FastFloat, - Method::Lexical, - Method::LexicalLossy, - Method::FromStr, - ] + &[Method::FastFloat, Method::Lexical, Method::FromStr] } }