-
-
Notifications
You must be signed in to change notification settings - Fork 1.6k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
This is a source port of https://github.com/fastfloat/fast_float, which is both locale-independent and platform-independent, meaning the special float values will work on MSYS2's MINGW64 environment too, as we are not calling `LibC.strtod` anymore. Additionally, non-ASCII whitespace characters are now stripped, just like `#to_i`. **The current implementation doesn't accept hexfloats.** This implementation brings a roughly 3x speedup, without any additional allocations.
- Loading branch information
1 parent
b6b190f
commit c5455ce
Showing
14 changed files
with
2,859 additions
and
62 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
require "spec" | ||
|
||
# Exhaustively checks that for all 4294967296 possible `Float32` values, | ||
# `to_s.to_f32` returns the original number. Splits the floats into 4096 bins | ||
# for better progress tracking. Also useful as a sort of benchmark. | ||
# | ||
# This was originally added when `String#to_f` moved from `LibC.strtod` to | ||
# `fast_float`, but is applicable to any other implementation as well. | ||
describe "x.to_s.to_f32 == x" do | ||
(0_u32..0xFFF_u32).each do |i| | ||
it "%03x00000..%03xfffff" % {i, i} do | ||
0x100000.times do |j| | ||
bits = i << 20 | j | ||
float = bits.unsafe_as(Float32) | ||
str = float.to_s | ||
val = str.to_f32?.should_not be_nil | ||
|
||
if float.nan? | ||
val.nan?.should be_true | ||
else | ||
val.should eq(float) | ||
Math.copysign(1, val).should eq(Math.copysign(1, float)) | ||
end | ||
end | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
# Runs the fast_float supplemental test suite: | ||
# https://github.com/fastfloat/supplemental_test_files | ||
# | ||
# Supplemental data files for testing floating parsing (credit: Nigel Tao for | ||
# the data) | ||
# | ||
# LICENSE file (Apache 2): https://github.com/nigeltao/parse-number-fxx-test-data/blob/main/LICENSE | ||
# | ||
# Due to the sheer volume of the test cases (5.2+ million test cases across | ||
# 270+ MB of text) these specs are not vendored into the Crystal repository. | ||
|
||
require "spec" | ||
require "http/client" | ||
require "../support/number" | ||
require "wait_group" | ||
|
||
# these specs permit underflow and overflow to return 0 and infinity | ||
# respectively (when `ret.rc == Errno::ERANGE`), so we have to use | ||
# `Float::FastFloat` directly | ||
def fast_float_to_f32(str) | ||
value = uninitialized Float32 | ||
start = str.to_unsafe | ||
finish = start + str.bytesize | ||
options = Float::FastFloat::ParseOptionsT(typeof(str.to_unsafe.value)).new(format: :general) | ||
|
||
ret = Float::FastFloat::BinaryFormat_Float32.new.from_chars_advanced(start, finish, pointerof(value), options) | ||
{Errno::NONE, Errno::ERANGE}.should contain(ret.ec) | ||
value | ||
end | ||
|
||
def fast_float_to_f64(str) | ||
value = uninitialized Float64 | ||
start = str.to_unsafe | ||
finish = start + str.bytesize | ||
options = Float::FastFloat::ParseOptionsT(typeof(str.to_unsafe.value)).new(format: :general) | ||
|
||
ret = Float::FastFloat::BinaryFormat_Float64.new.from_chars_advanced(start, finish, pointerof(value), options) | ||
{Errno::NONE, Errno::ERANGE}.should contain(ret.ec) | ||
value | ||
end | ||
|
||
RAW_BASE_URL = "https://raw.githubusercontent.com/fastfloat/supplemental_test_files/7cc512a7c60361ebe1baf54991d7905efdc62aa0/data/" # @1.0.0 | ||
|
||
TEST_SUITES = %w( | ||
freetype-2-7.txt | ||
google-double-conversion.txt | ||
google-wuffs.txt | ||
ibm-fpgen.txt | ||
lemire-fast-double-parser.txt | ||
lemire-fast-float.txt | ||
more-test-cases.txt | ||
remyoudompheng-fptest-0.txt | ||
remyoudompheng-fptest-1.txt | ||
remyoudompheng-fptest-2.txt | ||
remyoudompheng-fptest-3.txt | ||
tencent-rapidjson.txt | ||
ulfjack-ryu.txt | ||
) | ||
|
||
test_suite_cache = {} of String => Array({UInt32, UInt64, String}) | ||
puts "Fetching #{TEST_SUITES.size} test suites" | ||
WaitGroup.wait do |wg| | ||
TEST_SUITES.each do |suite| | ||
wg.spawn do | ||
url = RAW_BASE_URL + suite | ||
|
||
cache = HTTP::Client.get(url) do |res| | ||
res.body_io.each_line.map do |line| | ||
args = line.split(' ') | ||
raise "BUG: should have 4 args" unless args.size == 4 | ||
|
||
# f16_bits = args[0].to_u16(16) | ||
f32_bits = args[1].to_u32(16) | ||
f64_bits = args[2].to_u64(16) | ||
str = args[3] | ||
|
||
{f32_bits, f64_bits, str} | ||
end.to_a | ||
end | ||
|
||
puts "#{cache.size} test cases cached from #{url}" | ||
test_suite_cache[suite] = cache | ||
end | ||
end | ||
end | ||
puts "There are a total of #{test_suite_cache.sum(&.last.size)} test cases" | ||
|
||
describe String do | ||
describe "#to_f" do | ||
test_suite_cache.each do |suite, cache| | ||
describe suite do | ||
each_hardware_rounding_mode do |mode, mode_name| | ||
it mode_name do | ||
cache.each do |f32_bits, f64_bits, str| | ||
fast_float_to_f32(str).unsafe_as(UInt32).should eq(f32_bits) | ||
fast_float_to_f64(str).unsafe_as(UInt64).should eq(f64_bits) | ||
end | ||
end | ||
end | ||
end | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
struct Float | ||
# :nodoc: | ||
# Source port of the floating-point part of fast_float for C++: | ||
# https://github.com/fastfloat/fast_float | ||
# | ||
# fast_float implements the C++17 `std::from_chars`, which accepts a subset of | ||
# the C `strtod` / `strtof`'s string format: | ||
# | ||
# - a leading plus sign is disallowed, but both fast_float and this port | ||
# accept it; | ||
# - the exponent may be required or disallowed, depending on the format | ||
# argument (this port always allows both); | ||
# - hexfloats are not enabled by default, and fast_float doesn't implement it; | ||
# (https://github.com/fastfloat/fast_float/issues/124) | ||
# - hexfloats cannot start with `0x` or `0X`. | ||
# | ||
# The following is their license: | ||
# | ||
# Licensed under either of Apache License, Version 2.0 or MIT license or | ||
# BOOST license. | ||
# | ||
# Unless you explicitly state otherwise, any contribution intentionally | ||
# submitted for inclusion in this repository by you, as defined in the | ||
# Apache-2.0 license, shall be triple licensed as above, without any | ||
# additional terms or conditions. | ||
# | ||
# Main differences from the original fast_float: | ||
# | ||
# - Only `UC == UInt8` is implemented and tested, not the other wide chars; | ||
# - No explicit SIMD (the original mainly uses this for wide char strings). | ||
# | ||
# The following compile-time configuration is assumed: | ||
# | ||
# - #define FASTFLOAT_ALLOWS_LEADING_PLUS | ||
# - #define FLT_EVAL_METHOD 0 | ||
module FastFloat | ||
# Current revision: https://github.com/fastfloat/fast_float/tree/v6.1.6 | ||
|
||
def self.to_f64?(str : String, whitespace : Bool, strict : Bool) : Float64? | ||
value = uninitialized Float64 | ||
start = str.to_unsafe | ||
finish = start + str.bytesize | ||
options = ParseOptionsT(typeof(str.to_unsafe.value)).new(format: :general) | ||
|
||
if whitespace | ||
start += str.calc_excess_left | ||
finish -= str.calc_excess_right | ||
end | ||
|
||
ret = BinaryFormat_Float64.new.from_chars_advanced(start, finish, pointerof(value), options) | ||
if ret.ec == Errno::NONE && (!strict || ret.ptr == finish) | ||
value | ||
end | ||
end | ||
|
||
def self.to_f32?(str : String, whitespace : Bool, strict : Bool) : Float32? | ||
value = uninitialized Float32 | ||
start = str.to_unsafe | ||
finish = start + str.bytesize | ||
options = ParseOptionsT(typeof(str.to_unsafe.value)).new(format: :general) | ||
|
||
if whitespace | ||
start += str.calc_excess_left | ||
finish -= str.calc_excess_right | ||
end | ||
|
||
ret = BinaryFormat_Float32.new.from_chars_advanced(start, finish, pointerof(value), options) | ||
if ret.ec == Errno::NONE && (!strict || ret.ptr == finish) | ||
value | ||
end | ||
end | ||
end | ||
end | ||
|
||
require "./fast_float/parse_number" |
Oops, something went wrong.