diff --git a/lib/std/fmt/parse_float.zig b/lib/std/fmt/parse_float.zig index 585c23ed540c..da45fd8f44bc 100644 --- a/lib/std/fmt/parse_float.zig +++ b/lib/std/fmt/parse_float.zig @@ -1,377 +1,195 @@ -// Adapted from https://github.com/grzegorz-kraszewski/stringtofloat. - -// MIT License // -// Copyright (c) 2016 Grzegorz Kraszewski +// Adapted from sqlite3's sqlite3AtoF() +// https://github.com/mackyle/sqlite/blob/ae5d3aa91a794f6f1486b3f453ec44c0aa4c086b/src/util.c#L375-L571 // -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: +// adds support for parsing f128 floats, "nan", "inf" // -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. +// Returns a float if the string is a valid number, or error.invalidCharacter +// if the string is empty or contains extraneous text. Valid numbers +// are in one of these formats: // -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. +// [+-]digits[E[+-]digits] +// [+-]digits.[digits][E[+-]digits] +// [+-].digits[E[+-]digits] // - -// Be aware that this implementation has the following limitations: +// underscore characters may appear zero or more times between +// digits and are ignored by the parser: 500_000 // -// - Is not round-trip accurate for all values -// - Only supports round-to-zero -// - Does not handle denormals const std = @import("std"); const ascii = std.ascii; -// The mantissa field in FloatRepr is 64bit wide and holds only 19 digits -// without overflowing -const max_digits = 19; - -const f64_plus_zero: u64 = 0x0000000000000000; -const f64_minus_zero: u64 = 0x8000000000000000; -const f64_plus_infinity: u64 = 0x7FF0000000000000; -const f64_minus_infinity: u64 = 0xFFF0000000000000; - -const Z96 = struct { - d0: u32, - d1: u32, - d2: u32, - - // d = s >> 1 - inline fn shiftRight1(d: *Z96, s: Z96) void { - d.d0 = (s.d0 >> 1) | ((s.d1 & 1) << 31); - d.d1 = (s.d1 >> 1) | ((s.d2 & 1) << 31); - d.d2 = s.d2 >> 1; - } +fn caseInEql(a: []const u8, b: []const u8) bool { + if (a.len != b.len) return false; - // d = s << 1 - inline fn shiftLeft1(d: *Z96, s: Z96) void { - d.d2 = (s.d2 << 1) | ((s.d1 & (1 << 31)) >> 31); - d.d1 = (s.d1 << 1) | ((s.d0 & (1 << 31)) >> 31); - d.d0 = s.d0 << 1; + for (a) |_, i| { + if (ascii.toUpper(a[i]) != ascii.toUpper(b[i])) { + return false; + } } - // d += s - inline fn add(d: *Z96, s: Z96) void { - var w = @as(u64, d.d0) + @as(u64, s.d0); - d.d0 = @truncate(u32, w); + return true; +} - w >>= 32; - w += @as(u64, d.d1) + @as(u64, s.d1); - d.d1 = @truncate(u32, w); +pub const ParseFloatError = error{InvalidCharacter}; - w >>= 32; - w += @as(u64, d.d2) + @as(u64, s.d2); - d.d2 = @truncate(u32, w); +// +// Compute 10 to the E-th power. Examples: E==1 results in 10. +// E==2 results in 100. E==50 results in 1.0e50. +// +fn pow10(E_arg: u32) f128 { + var E = E_arg; + var x: f128 = 10.0; + var r: f128 = 1.0; + while (true) { + if (E & 1 != 0) r *= x; + E >>= 1; + if (E == 0) break; + x *= x; } + return r; +} - // d -= s - inline fn sub(d: *Z96, s: Z96) void { - var w = @as(u64, d.d0) -% @as(u64, s.d0); - d.d0 = @truncate(u32, w); - - w >>= 32; - w += @as(u64, d.d1) -% @as(u64, s.d1); - d.d1 = @truncate(u32, w); +pub fn parseFloat(comptime T: type, str: []const u8) !T { + if (str.len == 0) return error.InvalidCharacter; - w >>= 32; - w += @as(u64, d.d2) -% @as(u64, s.d2); - d.d2 = @truncate(u32, w); - } -}; - -const FloatRepr = struct { - negative: bool, - exponent: i32, - mantissa: u64, -}; - -fn convertRepr(comptime T: type, n: FloatRepr) T { - const mask28: u32 = 0xf << 28; - - var s: Z96 = undefined; - var q: Z96 = undefined; - var r: Z96 = undefined; - - s.d0 = @truncate(u32, n.mantissa); - s.d1 = @truncate(u32, n.mantissa >> 32); - s.d2 = 0; - - var binary_exponent: i32 = 92; - var exp = n.exponent; - - while (exp > 0) : (exp -= 1) { - q.shiftLeft1(s); // q = p << 1 - r.shiftLeft1(q); // r = p << 2 - s.shiftLeft1(r); // p = p << 3 - s.add(q); // p = (p << 3) + (p << 1) - - while (s.d2 & mask28 != 0) { - q.shiftRight1(s); - binary_exponent += 1; - s = q; - } + if (caseInEql(str, "nan")) { + return std.math.nan(T); + } else if (caseInEql(str, "inf") or caseInEql(str, "+inf")) { + return std.math.inf(T); + } else if (caseInEql(str, "-inf")) { + return -std.math.inf(T); } - while (exp < 0) { - while (s.d2 & (1 << 31) == 0) { - q.shiftLeft1(s); - binary_exponent -= 1; - s = q; - } + var z: usize = 0; // index into str[] + // sign * significand * (10 ^ (esign * exponent)) + var sign: i32 = 1; // sign of significand + var s: i128 = 0; // significand + var d: i32 = 0; // adjust exponent for shifting decimal point + var esign: i32 = 1; // sign of exponent + var e: i32 = 0; // exponent + var nDigits: i32 = 0; + + // get sign of significand + if (str[z] == '-') { + sign = -1; + z += 1; + } else if (str[z] == '+') { + z += 1; + } - q.d2 = s.d2 / 10; - r.d1 = s.d2 % 10; - r.d2 = (s.d1 >> 8) | (r.d1 << 24); - q.d1 = r.d2 / 10; - r.d1 = r.d2 % 10; - r.d2 = ((s.d1 & 0xff) << 16) | (s.d0 >> 16) | (r.d1 << 24); - r.d0 = r.d2 / 10; - r.d1 = r.d2 % 10; - q.d1 = (q.d1 << 8) | ((r.d0 & 0x00ff0000) >> 16); - q.d0 = r.d0 << 16; - r.d2 = (s.d0 *% 0xffff) | (r.d1 << 16); - q.d0 |= r.d2 / 10; - s = q; - - exp += 1; + // copy max significant digits to significand + while (z < str.len and ((ascii.isDigit(str[z]) and s < ((std.math.maxInt(@TypeOf(s)) - 9) / 10)) or str[z] == '_')) : (z += 1) { + if (str[z] == '_') continue; + s = s * 10 + (str[z] - '0'); + nDigits += 1; } - if (s.d0 != 0 or s.d1 != 0 or s.d2 != 0) { - while (s.d2 & mask28 == 0) { - q.shiftLeft1(s); - binary_exponent -= 1; - s = q; - } + // skip non-significant significand digits + // (increase exponent by d to shift decimal left) + while (z < str.len and (ascii.isDigit(str[z]) or str[z] == '_')) : (z += 1) { + if (str[z] == '_') continue; + nDigits += 1; + d += 1; } - binary_exponent += 1023; - - const repr: u64 = blk: { - if (binary_exponent > 2046) { - break :blk if (n.negative) f64_minus_infinity else f64_plus_infinity; - } else if (binary_exponent < 1) { - break :blk if (n.negative) f64_minus_zero else f64_plus_zero; - } else if (s.d2 != 0) { - const binexs2 = @intCast(u64, binary_exponent) << 52; - const rr = (@as(u64, s.d2 & ~mask28) << 24) | ((@as(u64, s.d1) + 128) >> 8) | binexs2; - break :blk if (n.negative) rr | (1 << 63) else rr; - } else { - break :blk 0; - } - }; + do_atof_calc: { + if (z >= str.len) break :do_atof_calc; - const f = @bitCast(f64, repr); - return @floatCast(T, f); -} + // if decimal point is present + if (str[z] == '.') { + z += 1; -const State = enum { - MaybeSign, - LeadingMantissaZeros, - LeadingFractionalZeros, - MantissaIntegral, - MantissaFractional, - ExponentSign, - LeadingExponentZeros, - Exponent, -}; - -const ParseResult = enum { - Ok, - PlusZero, - MinusZero, - PlusInf, - MinusInf, -}; - -fn parseRepr(s: []const u8, n: *FloatRepr) !ParseResult { - var digit_index: usize = 0; - var negative_exp = false; - var exponent: i32 = 0; - - var state = State.MaybeSign; - - var i: usize = 0; - while (i < s.len) { - const c = s[i]; - - switch (state) { - .MaybeSign => { - state = .LeadingMantissaZeros; - - if (c == '+') { - i += 1; - } else if (c == '-') { - n.negative = true; - i += 1; - } else if (ascii.isDigit(c) or c == '.') { - // continue - } else { - return error.InvalidCharacter; - } - }, - .LeadingMantissaZeros => { - if (c == '0') { - i += 1; - } else if (c == '.') { - i += 1; - state = .LeadingFractionalZeros; - } else if (c == '_') { - i += 1; - } else { - state = .MantissaIntegral; - } - }, - .LeadingFractionalZeros => { - if (c == '0') { - i += 1; - if (n.exponent > std.math.minInt(i32)) { - n.exponent -= 1; - } - } else { - state = .MantissaFractional; - } - }, - .MantissaIntegral => { - if (ascii.isDigit(c)) { - if (digit_index < max_digits) { - n.mantissa *%= 10; - n.mantissa += c - '0'; - digit_index += 1; - } else if (n.exponent < std.math.maxInt(i32)) { - n.exponent += 1; - } - - i += 1; - } else if (c == '.') { - i += 1; - state = .MantissaFractional; - } else if (c == '_') { - i += 1; - } else { - state = .MantissaFractional; - } - }, - .MantissaFractional => { - if (ascii.isDigit(c)) { - if (digit_index < max_digits) { - n.mantissa *%= 10; - n.mantissa += c - '0'; - n.exponent -%= 1; - digit_index += 1; - } - - i += 1; - } else if (c == 'e' or c == 'E') { - i += 1; - state = .ExponentSign; - } else if (c == '_') { - i += 1; - } else { - state = .ExponentSign; - } - }, - .ExponentSign => { - if (c == '+') { - i += 1; - } else if (c == '_') { - return error.InvalidCharacter; - } else if (c == '-') { - negative_exp = true; - i += 1; - } + if ((z < str.len and str[z] == '_') or str[z -| 2] == '_') { + return error.InvalidCharacter; + } - state = .LeadingExponentZeros; - }, - .LeadingExponentZeros => { - if (c == '0') { - i += 1; - } else if (c == '_') { - i += 1; - } else { - state = .Exponent; + // copy digits from after decimal to significand + // (decrease exponent by d to shift decimal right) + while (z < str.len and (ascii.isDigit(str[z]) or str[z] == '_')) : (z += 1) { + if (str[z] == '_') continue; + if (s < ((std.math.maxInt(@TypeOf(s)) - 9) / 10)) { + s = s * 10 + (str[z] - '0'); + d -= 1; } - }, - .Exponent => { - if (ascii.isDigit(c)) { - if (exponent < std.math.maxInt(i32) / 10) { - exponent *= 10; - exponent += @intCast(i32, c - '0'); - } - - i += 1; - } else if (c == '_') { - i += 1; - } else { - return error.InvalidCharacter; - } - }, + + nDigits += 1; + } } - } + if (z >= str.len) break :do_atof_calc; + + // if exponent is present + if (str[z] == 'e' or str[z] == 'E') { + z += 1; + + if (z >= str.len) return error.InvalidCharacter; // exponent not well formed + + if (str[z -| 2] == '_' or str[z] == '_') { + return error.InvalidCharacter; + } + + // get sign of exponent + if (str[z] == '-') { + esign = -1; + z += 1; + } else if (str[z] == '+') { + z += 1; + } + + // copy digits to exponent + var eValid = false; + while (z < str.len and (ascii.isDigit(str[z]) or str[z] == '_')) : (z += 1) { + if (str[z] == '_') continue; + e = (e *| 10 +| (str[z] - '0')); + eValid = true; + } + + if (!eValid) return error.InvalidCharacter; + } + } // do_atof_calc block - if (negative_exp) exponent = -exponent; - n.exponent += exponent; + if (z != str.len or nDigits == 0) return error.InvalidCharacter; - if (n.mantissa == 0) { - return if (n.negative) .MinusZero else .PlusZero; - } else if (n.exponent > 309) { - return if (n.negative) .MinusInf else .PlusInf; - } else if (n.exponent < -328) { - return if (n.negative) .MinusZero else .PlusZero; + // adjust exponent by d, and update sign + e = (e * esign) + d; + if (e < 0) { + esign = -1; + e *= -1; + } else { + esign = 1; } - return .Ok; -} - -fn caseInEql(a: []const u8, b: []const u8) bool { - if (a.len != b.len) return false; - - for (a) |_, i| { - if (ascii.toUpper(a[i]) != ascii.toUpper(b[i])) { - return false; + // Attempt to reduce exponent. + while (e > 0) { + if (esign > 0) { + if (s >= (std.math.maxInt(@TypeOf(s)) / 10)) break; + s *= 10; + } else { + if (@rem(s, 10) != 0) break; + s = @divTrunc(s, 10); } + e -= 1; } - return true; -} - -pub const ParseFloatError = error{InvalidCharacter}; - -pub fn parseFloat(comptime T: type, s: []const u8) ParseFloatError!T { - if (s.len == 0 or (s.len == 1 and (s[0] == '+' or s[0] == '-'))) { - return error.InvalidCharacter; - } - - if (caseInEql(s, "nan")) { - return std.math.nan(T); - } else if (caseInEql(s, "inf") or caseInEql(s, "+inf")) { - return std.math.inf(T); - } else if (caseInEql(s, "-inf")) { - return -std.math.inf(T); + // adjust the sign of significand + s = if (sign < 0) -s else s; + + var result: f128 = 0; + if (s == 0) { + // In the IEEE 754 standard, zero is signed. + result = if (sign < 0) -0.0 else 0.0; + } else if (e == 0) { + result = @intToFloat(f128, s); + } else { + const scale = pow10(@intCast(u32, e)); + if (esign < 0) { + result = @intToFloat(f128, s) / scale; + } else { + result = @intToFloat(f128, s) * scale; + } } - var r = FloatRepr{ - .negative = false, - .exponent = 0, - .mantissa = 0, - }; - - return switch (try parseRepr(s, &r)) { - .Ok => convertRepr(T, r), - .PlusZero => 0.0, - .MinusZero => -@as(T, 0.0), - .PlusInf => std.math.inf(T), - .MinusInf => -std.math.inf(T), - }; + return @floatCast(T, result); } test "fmt.parseFloat" { @@ -386,9 +204,13 @@ test "fmt.parseFloat" { try testing.expectError(error.InvalidCharacter, parseFloat(T, "")); try testing.expectError(error.InvalidCharacter, parseFloat(T, " 1")); - try testing.expectError(error.InvalidCharacter, parseFloat(T, "1abc")); + try testing.expectError(error.InvalidCharacter, parseFloat(T, "1 ")); try testing.expectError(error.InvalidCharacter, parseFloat(T, "+")); try testing.expectError(error.InvalidCharacter, parseFloat(T, "-")); + try testing.expectError(error.InvalidCharacter, parseFloat(T, "1_.5e2")); + try testing.expectError(error.InvalidCharacter, parseFloat(T, "1._5e2")); + try testing.expectError(error.InvalidCharacter, parseFloat(T, "1.5_e2")); + try testing.expectError(error.InvalidCharacter, parseFloat(T, "1.5e_2")); try expectEqual(try parseFloat(T, "0"), 0.0); try expectEqual(try parseFloat(T, "0"), 0.0); @@ -405,15 +227,16 @@ test "fmt.parseFloat" { try expect(approxEqAbs(T, try parseFloat(T, "3.141"), 3.141, epsilon)); try expect(approxEqAbs(T, try parseFloat(T, "-3.141"), -3.141, epsilon)); - try expectEqual(try parseFloat(T, "1e-700"), 0); - try expectEqual(try parseFloat(T, "1e+700"), std.math.inf(T)); + // 4933 is the smallest magnitude exponent that causes an f128 to go to infinity and zero + try expectEqual(try parseFloat(T, "1e-4933"), 0); + try expectEqual(try parseFloat(T, "1e+4933"), std.math.inf(T)); try expectEqual(@bitCast(Z, try parseFloat(T, "nAn")), @bitCast(Z, std.math.nan(T))); try expectEqual(try parseFloat(T, "inF"), std.math.inf(T)); try expectEqual(try parseFloat(T, "-INF"), -std.math.inf(T)); try expectEqual(try parseFloat(T, "0.4e0066999999999999999999999999999999999999999999999999999"), std.math.inf(T)); - try expect(approxEqAbs(T, try parseFloat(T, "0_1_2_3_4_5_6.7_8_9_0_0_0e0_0_1_0"), @as(T, 123456.789000e10), epsilon)); + try expect(approxEqAbs(T, try parseFloat(T, "0_1_2_3_4.7_8_9_0_0_0e0_0_2"), @as(T, 1234.789e2), epsilon)); if (T != f16) { try expect(approxEqAbs(T, try parseFloat(T, "1e-2"), 0.01, epsilon)); @@ -425,4 +248,22 @@ test "fmt.parseFloat" { try expect(approxEqAbs(T, try parseFloat(T, "2.71828182845904523536"), @as(T, 2.718281828459045), epsilon)); } } + + // test rounding behavior + try expectEqual(@bitCast(u64, try parseFloat(f64, "144115188075855870")), 0x4380000000000000); // exact + try expectEqual(@bitCast(u64, try parseFloat(f64, "144115188075855884")), 0x4380000000000000); // round down + try expectEqual(@bitCast(u64, try parseFloat(f64, "144115188075855885")), 0x4380000000000000); // round half toward zero + try expectEqual(@bitCast(u64, try parseFloat(f64, "144115188075855886")), 0x4380000000000000); // round down?? + try expectEqual(@bitCast(u64, try parseFloat(f64, "144115188075855889")), 0x4380000000000001); // round up + try expectEqual(@bitCast(u64, try parseFloat(f64, "144115188075855900")), 0x4380000000000001); // exact + + try expectEqual(@bitCast(u64, try parseFloat(f64, "9007199254740993")), 0x4340000000000000); // rounded down + + // test precision of f128 + try expectEqual(@bitCast(u128, try parseFloat(f128, "9007199254740993")), 0x40340000000000000800000000000000); // exact + + // test range of f128 + // at time of writing (Mar 2021), zig prints f128 values larger than f64 as "inf", + // so I'm not 100% sure this hex literal is the corrent parse of 1e4930 + try expectEqual(@bitCast(u128, try parseFloat(f128, "1e4930")), 0x7ff8136c69ce8adff4397b050cae44c7); } diff --git a/src/stage1/parse_f128.c b/src/stage1/parse_f128.c index ea6acc73d0da..fe0b179a7619 100644 --- a/src/stage1/parse_f128.c +++ b/src/stage1/parse_f128.c @@ -713,7 +713,7 @@ static float128_t decfloat(struct MuslFILE *f, int c, int bits, int emin, int si //y += bias; { float128_t new_value; - f128M_add(&y, &frac, &new_value); + f128M_add(&y, &bias, &new_value); y = new_value; } } diff --git a/test/behavior/floatop.zig b/test/behavior/floatop.zig index f8445b6e26cf..6357fbde426b 100644 --- a/test/behavior/floatop.zig +++ b/test/behavior/floatop.zig @@ -677,10 +677,10 @@ fn fnWithFloatMode() f32 { } test "float literal at compile time not lossy" { - if (builtin.zig_backend != .stage1) { - // https://github.com/ziglang/zig/issues/11169 - return error.SkipZigTest; - } + // if (builtin.zig_backend != .stage1) { + // // https://github.com/ziglang/zig/issues/11169 + // return error.SkipZigTest; + // } try expect(16777216.0 + 1.0 == 16777217.0); try expect(9007199254740992.0 + 1.0 == 9007199254740993.0);