Skip to content

Commit

Permalink
Change how floating point values are generated
Browse files Browse the repository at this point in the history
In order to avoid strange cases and improve floating point randomness the way
that floating point values are generated and scaled to a custom range has
been improved.
  • Loading branch information
lehins committed Dec 25, 2024
1 parent 1859217 commit 89b17ed
Show file tree
Hide file tree
Showing 3 changed files with 105 additions and 47 deletions.
2 changes: 1 addition & 1 deletion src/System/Random.hs
Original file line number Diff line number Diff line change
Expand Up @@ -357,7 +357,7 @@ class Random a where
-- independently:
--
-- >>> fst $ randomR (('a', 5.0), ('z', 10.0)) $ mkStdGen 26
-- ('z',7.27305019146949)
-- ('z',5.22694980853051)
--
-- In case when a lawful range is desired `uniformR` should be used
-- instead.
Expand Down
54 changes: 47 additions & 7 deletions src/System/Random/Internal.hs
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ module System.Random.Internal
, shuffleListM
, isInRangeOrd
, isInRangeEnum
, scaleFloating

-- * Generators for sequences of pseudo-random bytes
, uniformByteStringM
Expand Down Expand Up @@ -1394,13 +1395,16 @@ instance UniformRange Double where
| l == h = return l
| isInfinite l || isInfinite h =
-- Optimisation exploiting absorption:
-- (-Infinity) + (anything but +Infinity) = -Infinity
-- (anything but -Infinity) + (+Infinity) = +Infinity
-- (-Infinity) + (+Infinity) = NaN
-- (+Infinity) + (-Infinity) = NaN
-- (-Infinity) + (+Infinity) = NaN
-- (+Infinity) + _ = +Infinity
-- (-Infinity) + _ = -Infinity
-- _ + (+Infinity) = +Infinity
-- _ + (-Infinity) = -Infinity
return $! h + l
| otherwise = do
x <- uniformDouble01M g
return $! x * l + (1 - x) * h
w64 <- uniformWord64 g
pure $! scaleFloating l h w64
{-# INLINE uniformRM #-}
isInRange = isInRangeOrd

Expand Down Expand Up @@ -1437,13 +1441,49 @@ instance UniformRange Float where
uniformRM (l, h) g
| l == h = return l
| isInfinite l || isInfinite h =
-- Optimisation exploiting absorption:
-- (+Infinity) + (-Infinity) = NaN
-- (-Infinity) + (+Infinity) = NaN
-- (+Infinity) + _ = +Infinity
-- (-Infinity) + _ = -Infinity
-- _ + (+Infinity) = +Infinity
-- _ + (-Infinity) = -Infinity
return $! h + l
| otherwise = do
x <- uniformFloat01M g
return $! x * l + (1 - x) * h
w32 <- uniformWord32 g
pure $! scaleFloating l h w32
{-# INLINE uniformRM #-}
isInRange = isInRangeOrd

-- | This is the function that is used to scale a floating point values from random word range to
-- the custom @[low, high]@ range.
--
-- @since 1.3.0
scaleFloating ::
forall a w. (Ord a, Fractional a, RealFloat a, Integral w, Bounded w, FiniteBits w)
=> a
-- ^ Low
-> a
-- ^ High
-> w
-- ^ Uniformly distributed unsigned integral value that will be used for converting to a floating
-- point value and subsequent scaling to the specified range
-> a
scaleFloating l h w =
if isInfinite diff
then let !x = fromIntegral w / m
!y = x * l + (1 - x) * h
in max (min y (max l h)) (min l h)
else let !topMostBit = finiteBitSize w - 1
!x = fromIntegral (clearBit w topMostBit) / m
in if testBit w topMostBit
then l + diff * x
else h + negate diff * x
where
!diff = h - l
!m = fromIntegral (maxBound :: w) :: a
{-# INLINE scaleFloating #-}

-- | Generates uniformly distributed 'Float' in the range \([0, 1]\).
-- Numbers are generated by generating uniform 'Word32' and dividing
-- it by \(2^{32}\). It's used to implement 'UniformRange' instance for 'Float'.
Expand Down
96 changes: 57 additions & 39 deletions src/System/Random/Stateful.hs
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ module System.Random.Stateful
-- $implemenstatefulegen

-- ** Floating point number caveats #fpcaveats#
, scaleFloating
-- $floating

-- * References
Expand Down Expand Up @@ -770,80 +771,97 @@ applyTGen f (TGenM tvar) = do

-- $floating
--
-- Due to rounding errors, floating point operations are neither associative nor
-- distributive the way the corresponding operations on real numbers are. Additionally,
-- floating point numbers admit special values @NaN@ as well as negative and positive
-- infinity.
--
-- The 'UniformRange' instances for 'Float' and 'Double' use the following
-- procedure to generate a random value in a range for @uniformRM (a, b) g@:
-- procedure to generate a random value in a range for @uniformRM (l, h) g@:
--
-- * If @__l == h__@, return: @__l__@.
-- * If @__`isInfinite` l == True__@ or @__`isInfinite` h == True__@, return: @__l + h__@
-- * Otherwise:
--
-- 1. Generate an unsigned integral of matching width @__w__@ uniformly.
--
-- If \(a = b\), return \(a\). Otherwise:
-- 2. Check whether @__h - l__@ overflows to infinity and if does then convert @__w__@ to a floating
-- point number in @__[0.0, 1.0]__@ range through division of @__w__@ by the highest possible value:
--
-- 1. Generate \(x\) uniformly such that \(0 \leq x \leq 1\).
-- @
-- x = `fromIntegral` w / `fromIntegral` `maxBound`
-- @
--
-- The method by which \(x\) is sampled does not cover all representable
-- floating point numbers in the unit interval. The method never generates
-- denormal floating point numbers, for example.
-- Then we scale and clamp it before returning it:
--
-- 2. Return \(x \cdot a + (1 - x) \cdot b\).
-- @
-- `max` (`min` (x * l + (1 - x) * h) (`max` l h)) (`min` l h)
-- @
--
-- Due to rounding errors, floating point operations are neither
-- associative nor distributive the way the corresponding operations on
-- real numbers are. Additionally, floating point numbers admit special
-- values @NaN@ as well as negative and positive infinity.
-- Clamping is necessary, because otherwise it would be possible to run into a
-- degenerate case when a scaled value is outside the specified range due to
-- rounding errors.
--
-- For pathological values, step 2 can yield surprising results.
-- 3. Whenever @__h - l__@ does not overflow, we use this common formula ofor scaling: @__ l + (h - l) * x__@.
-- However, instead of using @__[0.0, 1.0]__@ range we use the top most bit of @__w__@ to decide whether
-- we will treat the generated floating point value as @__[0.0, 0.5]__@ range or @__[0.5, 1.0]__@ range and use
-- the left over bits to produce a floating point value in the half unit range:
--
-- * The result may be greater than @max a b@.
-- @
-- x = `fromIntegral` (`clearBit` w 31) / `fromIntegral` `maxBound`
-- @
--
-- >>> :{
-- let (a, b, x) = (-2.13238e-29, -2.1323799e-29, 0.27736077)
-- result = x * a + (1 - x) * b :: Float
-- in (result, result > max a b)
-- :}
-- (-2.1323797e-29,True)
-- Further scaling depends on the top most bit:
--
-- * The result may be smaller than @min a b@.
-- @
-- if `testBit` w 31
-- then l + (h - l) * x
-- else h + (l - h) * x
-- @
--
-- >>> :{
-- let (a, b, x) = (-1.9087862, -1.908786, 0.4228573)
-- result = x * a + (1 - x) * b :: Float
-- in (result, result < min a b)
-- :}
-- (-1.9087863,True)
-- Because of this clever technique the result does not need clamping, since
-- scaled values are guaranteed to stay within the specified range.
--
-- What happens when @NaN@ or @Infinity@ are given to 'uniformRM'? We first
--
-- What happens when @__NaN__@ or @__Infinity__@ are given to 'uniformRM'? We first
-- define them as constants:
--
-- >>> nan = read "NaN" :: Float
-- >>> inf = read "Infinity" :: Float
-- >>> g <- newIOGenM (mkStdGen 2024)
--
-- * If at least one of \(a\) or \(b\) is @NaN@, the result is @NaN@.
-- * If at least one of \(l\) or \(h\) is @__NaN__@, the result is @__NaN__@.
--
-- >>> let (a, b, x) = (nan, 1, 0.5) in x * a + (1 - x) * b
-- >>> uniformRM (nan, 1) g
-- NaN
-- >>> let (a, b, x) = (-1, nan, 0.5) in x * a + (1 - x) * b
-- >>> uniformRM (-1, nan) g
-- NaN
--
-- * If \(a\) is @-Infinity@ and \(b\) is @Infinity@, the result is @NaN@.
-- * If \(l\) and \(h\) are both @__Infinity__@ with opposing signes, then the result is @__NaN__@.
--
-- >>> let (a, b, x) = (-inf, inf, 0.5) in x * a + (1 - x) * b
-- >>> uniformRM (-inf, inf) g
-- NaN
-- >>> uniformRM (inf, -inf) g
-- NaN
--
-- * Otherwise, if \(a\) is @Infinity@ or @-Infinity@, the result is \(a\).
-- * Otherwise, if \(l\) is @__Infinity__@ or @__-Infinity__@, the result is \(l\).
--
-- >>> let (a, b, x) = (inf, 1, 0.5) in x * a + (1 - x) * b
-- >>> uniformRM (inf, 1) g
-- Infinity
-- >>> let (a, b, x) = (-inf, 1, 0.5) in x * a + (1 - x) * b
-- >>> uniformRM (-inf, 1) g
-- -Infinity
--
-- * Otherwise, if \(b\) is @Infinity@ or @-Infinity@, the result is \(b\).
-- * Otherwise, if \(h\) is @__Infinity__@ or @__-Infinity__@, the result is \(h\).
--
-- >>> let (a, b, x) = (1, inf, 0.5) in x * a + (1 - x) * b
-- >>> uniformRM (1, inf) g
-- Infinity
-- >>> let (a, b, x) = (1, -inf, 0.5) in x * a + (1 - x) * b
-- >>> uniformRM (1, -inf) g
-- -Infinity
--
-- Note that the [GCC 10.1.0 C++ standard library](https://gcc.gnu.org/git/?p=gcc.git;a=blob;f=libstdc%2B%2B-v3/include/bits/random.h;h=19307fbc3ca401976ef6823e8fda893e4a263751;hb=63fa67847628e5f358e7e2e7edb8314f0ee31f30#l1859),
-- the [Java 10 standard library](https://docs.oracle.com/javase/10/docs/api/java/util/Random.html#doubles%28double,double%29)
-- and [CPython 3.8](https://github.com/python/cpython/blob/3.8/Lib/random.py#L417)
-- use the same procedure to generate floating point values in a range.
-- use a similar procedure to generate floating point values in a range.
--
-- $implemenstatefulegen
--
Expand Down

0 comments on commit 89b17ed

Please sign in to comment.