Skip to content

Commit

Permalink
Implement fast_float for String#to_f (#15195)
Browse files Browse the repository at this point in the history
This is a source port of https://github.com/fastfloat/fast_float, which is both locale-independent and platform-independent, meaning the special float values will work on MSYS2's MINGW64 environment too, as we are not calling `LibC.strtod` anymore. Additionally, non-ASCII whitespace characters are now stripped, just like `#to_i`.

**The current implementation doesn't accept hexfloats.**

This implementation brings a roughly 3x speedup, without any additional allocations.
  • Loading branch information
HertzDevil authored Dec 21, 2024
1 parent b6b190f commit c5455ce
Show file tree
Hide file tree
Showing 14 changed files with 2,859 additions and 62 deletions.
27 changes: 27 additions & 0 deletions spec/manual/string_to_f32_spec.cr
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
require "spec"

# Exhaustively checks that for all 4294967296 possible `Float32` values,
# `to_s.to_f32` returns the original number. Splits the floats into 4096 bins
# for better progress tracking. Also useful as a sort of benchmark.
#
# This was originally added when `String#to_f` moved from `LibC.strtod` to
# `fast_float`, but is applicable to any other implementation as well.
describe "x.to_s.to_f32 == x" do
(0_u32..0xFFF_u32).each do |i|
it "%03x00000..%03xfffff" % {i, i} do
0x100000.times do |j|
bits = i << 20 | j
float = bits.unsafe_as(Float32)
str = float.to_s
val = str.to_f32?.should_not be_nil

if float.nan?
val.nan?.should be_true
else
val.should eq(float)
Math.copysign(1, val).should eq(Math.copysign(1, float))
end
end
end
end
end
103 changes: 103 additions & 0 deletions spec/manual/string_to_f_supplemental_spec.cr
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
# Runs the fast_float supplemental test suite:
# https://github.com/fastfloat/supplemental_test_files
#
# Supplemental data files for testing floating parsing (credit: Nigel Tao for
# the data)
#
# LICENSE file (Apache 2): https://github.com/nigeltao/parse-number-fxx-test-data/blob/main/LICENSE
#
# Due to the sheer volume of the test cases (5.2+ million test cases across
# 270+ MB of text) these specs are not vendored into the Crystal repository.

require "spec"
require "http/client"
require "../support/number"
require "wait_group"

# these specs permit underflow and overflow to return 0 and infinity
# respectively (when `ret.rc == Errno::ERANGE`), so we have to use
# `Float::FastFloat` directly
def fast_float_to_f32(str)
value = uninitialized Float32
start = str.to_unsafe
finish = start + str.bytesize
options = Float::FastFloat::ParseOptionsT(typeof(str.to_unsafe.value)).new(format: :general)

ret = Float::FastFloat::BinaryFormat_Float32.new.from_chars_advanced(start, finish, pointerof(value), options)
{Errno::NONE, Errno::ERANGE}.should contain(ret.ec)
value
end

def fast_float_to_f64(str)
value = uninitialized Float64
start = str.to_unsafe
finish = start + str.bytesize
options = Float::FastFloat::ParseOptionsT(typeof(str.to_unsafe.value)).new(format: :general)

ret = Float::FastFloat::BinaryFormat_Float64.new.from_chars_advanced(start, finish, pointerof(value), options)
{Errno::NONE, Errno::ERANGE}.should contain(ret.ec)
value
end

RAW_BASE_URL = "https://raw.githubusercontent.com/fastfloat/supplemental_test_files/7cc512a7c60361ebe1baf54991d7905efdc62aa0/data/" # @1.0.0

TEST_SUITES = %w(
freetype-2-7.txt
google-double-conversion.txt
google-wuffs.txt
ibm-fpgen.txt
lemire-fast-double-parser.txt
lemire-fast-float.txt
more-test-cases.txt
remyoudompheng-fptest-0.txt
remyoudompheng-fptest-1.txt
remyoudompheng-fptest-2.txt
remyoudompheng-fptest-3.txt
tencent-rapidjson.txt
ulfjack-ryu.txt
)

test_suite_cache = {} of String => Array({UInt32, UInt64, String})
puts "Fetching #{TEST_SUITES.size} test suites"
WaitGroup.wait do |wg|
TEST_SUITES.each do |suite|
wg.spawn do
url = RAW_BASE_URL + suite

cache = HTTP::Client.get(url) do |res|
res.body_io.each_line.map do |line|
args = line.split(' ')
raise "BUG: should have 4 args" unless args.size == 4

# f16_bits = args[0].to_u16(16)
f32_bits = args[1].to_u32(16)
f64_bits = args[2].to_u64(16)
str = args[3]

{f32_bits, f64_bits, str}
end.to_a
end

puts "#{cache.size} test cases cached from #{url}"
test_suite_cache[suite] = cache
end
end
end
puts "There are a total of #{test_suite_cache.sum(&.last.size)} test cases"

describe String do
describe "#to_f" do
test_suite_cache.each do |suite, cache|
describe suite do
each_hardware_rounding_mode do |mode, mode_name|
it mode_name do
cache.each do |f32_bits, f64_bits, str|
fast_float_to_f32(str).unsafe_as(UInt32).should eq(f32_bits)
fast_float_to_f64(str).unsafe_as(UInt64).should eq(f64_bits)
end
end
end
end
end
end
end
4 changes: 4 additions & 0 deletions spec/std/string_spec.cr
Original file line number Diff line number Diff line change
Expand Up @@ -482,6 +482,7 @@ describe "String" do
it { "1Y2P0IJ32E8E7".to_i64(36).should eq(9223372036854775807) }
end

# more specs are available in `spec/manual/string_to_f_supplemental_spec.cr`
it "does to_f" do
expect_raises(ArgumentError) { "".to_f }
"".to_f?.should be_nil
Expand All @@ -503,6 +504,7 @@ describe "String" do
" 1234.56 ".to_f?(whitespace: false).should be_nil
expect_raises(ArgumentError) { " 1234.56foo".to_f }
" 1234.56foo".to_f?.should be_nil
"\u{A0}\u{2028}\u{2029}1234.56\u{A0}\u{2028}\u{2029}".to_f.should eq(1234.56_f64)
"123.45 x".to_f64(strict: false).should eq(123.45_f64)
expect_raises(ArgumentError) { "x1.2".to_f64 }
"x1.2".to_f64?.should be_nil
Expand Down Expand Up @@ -547,6 +549,7 @@ describe "String" do
" 1234.56 ".to_f32?(whitespace: false).should be_nil
expect_raises(ArgumentError) { " 1234.56foo".to_f32 }
" 1234.56foo".to_f32?.should be_nil
"\u{A0}\u{2028}\u{2029}1234.56\u{A0}\u{2028}\u{2029}".to_f32.should eq(1234.56_f32)
"123.45 x".to_f32(strict: false).should eq(123.45_f32)
expect_raises(ArgumentError) { "x1.2".to_f32 }
"x1.2".to_f32?.should be_nil
Expand Down Expand Up @@ -590,6 +593,7 @@ describe "String" do
" 1234.56 ".to_f64?(whitespace: false).should be_nil
expect_raises(ArgumentError) { " 1234.56foo".to_f64 }
" 1234.56foo".to_f64?.should be_nil
"\u{A0}\u{2028}\u{2029}1234.56\u{A0}\u{2028}\u{2029}".to_f64.should eq(1234.56_f64)
"123.45 x".to_f64(strict: false).should eq(123.45_f64)
expect_raises(ArgumentError) { "x1.2".to_f64 }
"x1.2".to_f64?.should be_nil
Expand Down
32 changes: 32 additions & 0 deletions spec/support/number.cr
Original file line number Diff line number Diff line change
Expand Up @@ -94,3 +94,35 @@ macro hexfloat(str)
::Float64.parse_hexfloat({{ str }})
{% end %}
end

# See also: https://github.com/crystal-lang/crystal/issues/15192
lib LibC
{% if flag?(:win32) %}
FE_TONEAREST = 0x00000000
FE_DOWNWARD = 0x00000100
FE_UPWARD = 0x00000200
FE_TOWARDZERO = 0x00000300
{% else %}
FE_TONEAREST = 0x00000000
FE_DOWNWARD = 0x00000400
FE_UPWARD = 0x00000800
FE_TOWARDZERO = 0x00000C00
{% end %}

fun fegetround : Int
fun fesetround(round : Int) : Int
end

def with_hardware_rounding_mode(mode, &)
old_mode = LibC.fegetround
LibC.fesetround(mode)
yield ensure LibC.fesetround(old_mode)
end

def each_hardware_rounding_mode(&)
{% for mode in %w(FE_TONEAREST FE_DOWNWARD FE_UPWARD FE_TOWARDZERO) %}
with_hardware_rounding_mode(LibC::{{ mode.id }}) do
yield LibC::{{ mode.id }}, {{ mode }}
end
{% end %}
end
75 changes: 75 additions & 0 deletions src/float/fast_float.cr
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
struct Float
# :nodoc:
# Source port of the floating-point part of fast_float for C++:
# https://github.com/fastfloat/fast_float
#
# fast_float implements the C++17 `std::from_chars`, which accepts a subset of
# the C `strtod` / `strtof`'s string format:
#
# - a leading plus sign is disallowed, but both fast_float and this port
# accept it;
# - the exponent may be required or disallowed, depending on the format
# argument (this port always allows both);
# - hexfloats are not enabled by default, and fast_float doesn't implement it;
# (https://github.com/fastfloat/fast_float/issues/124)
# - hexfloats cannot start with `0x` or `0X`.
#
# The following is their license:
#
# Licensed under either of Apache License, Version 2.0 or MIT license or
# BOOST license.
#
# Unless you explicitly state otherwise, any contribution intentionally
# submitted for inclusion in this repository by you, as defined in the
# Apache-2.0 license, shall be triple licensed as above, without any
# additional terms or conditions.
#
# Main differences from the original fast_float:
#
# - Only `UC == UInt8` is implemented and tested, not the other wide chars;
# - No explicit SIMD (the original mainly uses this for wide char strings).
#
# The following compile-time configuration is assumed:
#
# - #define FASTFLOAT_ALLOWS_LEADING_PLUS
# - #define FLT_EVAL_METHOD 0
module FastFloat
# Current revision: https://github.com/fastfloat/fast_float/tree/v6.1.6

def self.to_f64?(str : String, whitespace : Bool, strict : Bool) : Float64?
value = uninitialized Float64
start = str.to_unsafe
finish = start + str.bytesize
options = ParseOptionsT(typeof(str.to_unsafe.value)).new(format: :general)

if whitespace
start += str.calc_excess_left
finish -= str.calc_excess_right
end

ret = BinaryFormat_Float64.new.from_chars_advanced(start, finish, pointerof(value), options)
if ret.ec == Errno::NONE && (!strict || ret.ptr == finish)
value
end
end

def self.to_f32?(str : String, whitespace : Bool, strict : Bool) : Float32?
value = uninitialized Float32
start = str.to_unsafe
finish = start + str.bytesize
options = ParseOptionsT(typeof(str.to_unsafe.value)).new(format: :general)

if whitespace
start += str.calc_excess_left
finish -= str.calc_excess_right
end

ret = BinaryFormat_Float32.new.from_chars_advanced(start, finish, pointerof(value), options)
if ret.ec == Errno::NONE && (!strict || ret.ptr == finish)
value
end
end
end
end

require "./fast_float/parse_number"
Loading

0 comments on commit c5455ce

Please sign in to comment.