Skip to content

Fix a bug with trailing zeros when parsing decimals #11

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Jan 11, 2021
Merged
13 changes: 6 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -92,15 +92,15 @@ C++ library, here are few brief notes:

## Benchmarks

Below is the table of average timings in nanoseconds for parsing a single number
Below is the table of best timings in nanoseconds for parsing a single number
into a 64-bit float.

| | `canada` | `mesh` | `uniform` | `iidi` | `iei` | `rec32` |
| ---------------- | -------- | -------- | --------- | ------ | ------ | ------- |
| fast-float | 22.08 | 11.10 | 20.04 | 40.77 | 26.33 | 29.84 |
| lexical | 61.63 | 25.10 | 53.77 | 72.33 | 53.39 | 72.40 |
| lexical/lossy | 61.51 | 25.24 | 54.00 | 71.30 | 52.87 | 71.71 |
| from_str | 175.07 | 22.58 | 103.00 | 228.78 | 115.76 | 211.13 |
| fast-float | 21.58 | 10.70 | 19.36 | 40.50 | 26.07 | 29.13 |
| lexical | 65.90 | 23.28 | 54.75 | 75.80 | 52.18 | 75.36 |
| lexical/lossy | 65.90 | 23.28 | 54.75 | 75.80 | 52.18 | 75.36 |
| from_str | 174.43 | 22.30 | 99.93 | 227.76 | 111.31 | 204.46 |
| fast_float (C++) | 22.78 | 10.99 | 20.05 | 41.12 | 27.51 | 30.85 |
| abseil (C++) | 42.66 | 32.88 | 46.01 | 50.83 | 46.33 | 49.95 |
| netlib (C++) | 57.53 | 24.86 | 64.72 | 56.63 | 36.20 | 67.29 |
Expand All @@ -109,8 +109,7 @@ into a 64-bit float.
Parsers:

- `fast-float` - this very crate
- `lexical` – from `lexical_core` crate, v0.7
- `lexical/lossy` - from `lexical_core` crate, v0.7 (lossy parser)
- `lexical` – `lexical_core`, v0.7 (non-lossy; same performance as lossy)
- `from_str` – Rust standard library, `FromStr` trait
- `fast_float (C++)` – original C++ implementation of 'fast-float' method
- `abseil (C++)` – Abseil C++ Common Libraries
Expand Down
18 changes: 3 additions & 15 deletions extras/simple-bench/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use std::str::FromStr;
use std::time::Instant;

use fastrand::Rng;
use lexical::{FromLexical, FromLexicalLossy};
use lexical::FromLexical;
use structopt::StructOpt;

use fast_float::FastFloat;
Expand Down Expand Up @@ -109,7 +109,6 @@ fn run_bench<T: FastFloat, F: Fn(&str) -> T>(
enum Method {
FastFloat,
Lexical,
LexicalLossy,
FromStr,
}

Expand All @@ -126,12 +125,11 @@ impl Method {
match self {
Self::FastFloat => "fast-float",
Self::Lexical => "lexical",
Self::LexicalLossy => "lexical/lossy",
Self::FromStr => "from_str",
}
}

fn run_as<T: FastFloat + FromLexical + FromLexicalLossy + FromStr>(
fn run_as<T: FastFloat + FromLexical + FromStr>(
&self,
input: &Input,
repeat: usize,
Expand All @@ -147,11 +145,6 @@ impl Method {
.unwrap_or_default()
.0
}),
Self::LexicalLossy => run_bench(data, repeat, |s: &str| {
lexical_core::parse_partial_lossy::<T>(s.as_bytes())
.unwrap_or_default()
.0
}),
Self::FromStr => run_bench(data, repeat, |s: &str| s.parse::<T>().unwrap_or_default()),
};

Expand All @@ -172,12 +165,7 @@ impl Method {
}

pub fn all() -> &'static [Self] {
&[
Method::FastFloat,
Method::Lexical,
Method::LexicalLossy,
Method::FromStr,
]
&[Method::FastFloat, Method::Lexical, Method::FromStr]
}
}

Expand Down
24 changes: 19 additions & 5 deletions src/decimal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,7 @@ impl Decimal {
pub fn parse_decimal(mut s: &[u8]) -> Decimal {
// can't fail since it follows a call to parse_number
let mut d = Decimal::default();
let start = s;
let c = s.get_first();
d.negative = c == b'-';
if c == b'-' || c == b'+' {
Expand Down Expand Up @@ -217,6 +218,24 @@ pub fn parse_decimal(mut s: &[u8]) -> Decimal {
parse_digits(&mut s, |digit| d.try_add_digit(digit));
d.decimal_point = s.len() as i32 - first.len() as i32;
}
if d.num_digits != 0 {
// Ignore the trailing zeros if there are any
let mut n_trailing_zeros = 0;
for &c in start[..(start.len() - s.len())].iter().rev() {
if c == b'0' {
n_trailing_zeros += 1;
} else if c != b'.' {
break;
}
}
d.decimal_point += n_trailing_zeros as i32;
d.num_digits -= n_trailing_zeros;
d.decimal_point += d.num_digits as i32;
if d.num_digits > Decimal::MAX_DIGITS {
d.truncated = true;
d.num_digits = Decimal::MAX_DIGITS;
}
}
if s.check_first2(b'e', b'E') {
s = s.advance(1);
let mut neg_exp = false;
Expand All @@ -234,11 +253,6 @@ pub fn parse_decimal(mut s: &[u8]) -> Decimal {
});
d.decimal_point += if neg_exp { -exp_num } else { exp_num };
}
d.decimal_point += d.num_digits as i32;
if d.num_digits > Decimal::MAX_DIGITS {
d.truncated = true;
d.num_digits = Decimal::MAX_DIGITS;
}
for i in d.num_digits..Decimal::MAX_DIGITS_WITHOUT_OVERFLOW {
d.digits[i] = 0;
}
Expand Down
20 changes: 10 additions & 10 deletions src/number.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,30 +68,26 @@ fn try_parse_19digits(s: &mut AsciiStr<'_>, x: &mut u64) {
}

#[inline]
fn try_parse_8digits_le(s: &mut AsciiStr<'_>, x: &mut u64) -> usize {
fn try_parse_8digits_le(s: &mut AsciiStr<'_>, x: &mut u64) {
// may cause overflows, to be handled later
let mut count = 0;
if cfg!(target_endian = "little") {
if let Some(v) = s.try_read_u64() {
if is_8digits_le(v) {
*x = x
.wrapping_mul(1_0000_0000)
.wrapping_add(parse_8digits_le(v));
s.step_by(8);
count = 8;
if let Some(v) = s.try_read_u64() {
if is_8digits_le(v) {
*x = x
.wrapping_mul(1_0000_0000)
.wrapping_add(parse_8digits_le(v));
s.step_by(8);
count = 16;
}
}
}
}
}
count
}

#[inline]
Expand Down Expand Up @@ -124,18 +120,22 @@ fn parse_scientific(s: &mut AsciiStr<'_>) -> i64 {

#[inline]
pub fn parse_number(s: &[u8]) -> Option<(Number, usize)> {
// assuming s.len() >= 1
debug_assert!(!s.is_empty());

let mut s = AsciiStr::new(s);
let start = s;

// handle optional +/- sign
let mut negative = false;
if s.first_either(b'-', b'+') {
negative = s.first_is(b'-');
if s.first() == b'-' {
negative = true;
if s.step().is_empty() {
return None;
}
} else if s.first() == b'+' && s.step().is_empty() {
return None;
}
debug_assert!(!s.is_empty());

// parse initial digits before dot
let mut mantissa = 0_u64;
Expand Down Expand Up @@ -171,8 +171,7 @@ pub fn parse_number(s: &[u8]) -> Option<(Number, usize)> {
let len = s.offset_from(&start) as _;

// handle uncommon case with many digits
n_digits -= 19;
if n_digits <= 0 {
if n_digits <= 19 {
return Some((
Number {
exponent,
Expand All @@ -184,6 +183,7 @@ pub fn parse_number(s: &[u8]) -> Option<(Number, usize)> {
));
}

n_digits -= 19;
let mut many_digits = false;
let mut p = digits_start;
while p.check_first_either(b'0', b'.') {
Expand Down
41 changes: 39 additions & 2 deletions tests/test_basic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,14 @@ macro_rules! check {
check!($ty, $s, core::$ty::NEG_INFINITY)
}};
($ty:ident, $s:expr, $e:expr) => {{
let s = $s.as_bytes();
let string = String::from($s);
let s = string.as_bytes();
let expected: $ty = $e;
let result = fast_float::parse::<$ty, _>(s).unwrap();
assert_eq!(result, expected);
let lex = lexical_core::parse::<$ty>(s).unwrap();
assert_eq!(result, lex);
let std = <$ty>::from_str($s);
let std = <$ty>::from_str(string.as_str());
if let Ok(std) = std {
// stdlib can't parse all weird floats
if std.is_finite() && result.is_finite() {
Expand Down Expand Up @@ -84,6 +85,14 @@ macro_rules! check_f64_neg_inf {
};
}

fn append_zeros(s: impl AsRef<str>, n: usize) -> String {
let mut s = String::from(s.as_ref());
for _ in 0..n {
s.push('0');
}
s
}

#[test]
fn test_f64_inf() {
check_f64_inf!("INF");
Expand Down Expand Up @@ -200,6 +209,17 @@ fn test_f64_long() {

#[test]
fn test_f64_general() {
check_f64!("9007199254740993.0", hexf64("0x1.p+53"));
check_f64!(append_zeros("9007199254740993.0", 1000), hexf64("0x1.p+53"));
check_f64!("10000000000000000000", hexf64("0x1.158e460913dp+63"));
check_f64!(
"10000000000000000000000000000001000000000000",
hexf64("0x1.cb2d6f618c879p+142")
);
check_f64!(
"10000000000000000000000000000000000000000001",
hexf64("0x1.cb2d6f618c879p+142")
);
check_f64!(1.1920928955078125e-07);
check_f64!("-0", -0.0);
check_f64!(
Expand Down Expand Up @@ -277,6 +297,23 @@ fn test_f32_inf() {

#[test]
fn test_f32_basic() {
let f1 = "\
1.175494140627517859246175898662808184331245864732796240031385942718174675986064\
7699724722770042717456817626953125";
check_f32!(f1, hexf32("0x1.2ced3p+0"));
check_f32!(format!("{}e-38", f1), hexf32("0x1.fffff8p-127"));
check_f32!(
format!("{}e-38", append_zeros(f1, 655)),
hexf32("0x1.fffff8p-127")
);
check_f32!(
format!("{}e-38", append_zeros(f1, 656)),
hexf32("0x1.fffff8p-127")
);
check_f32!(
format!("{}e-38", append_zeros(f1, 1000)),
hexf32("0x1.fffff8p-127")
);
check_f32!(1.00000006e+09);
check_f32!(1.4012984643e-45);
check_f32!(1.1754942107e-38);
Expand Down
14 changes: 13 additions & 1 deletion tests/test_exhaustive.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#[test]
#[ignore]
fn test_f32_exhaustive() {
fn test_f32_exhaustive_ryu() {
let mut buf = ryu::Buffer::new();
for i in 0..0xFFFF_FFFF_u32 {
let a: f32 = unsafe { core::mem::transmute(i) };
Expand All @@ -9,3 +9,15 @@ fn test_f32_exhaustive() {
assert!(a == b || (a.is_nan() && b.is_nan()));
}
}

#[test]
#[ignore]
fn test_f32_exhaustive_lexical() {
let mut buf = [0; 1024];
for i in 0..0xFFFF_FFFF_u32 {
let a: f32 = unsafe { core::mem::transmute(i) };
let s = lexical_core::write(a, &mut buf);
let b: f32 = fast_float::parse(s).unwrap();
assert!(a == b || (a.is_nan() && b.is_nan()));
}
}