From d1f961c4be38f4147ac1adb3c2bfb2745a9fde6b Mon Sep 17 00:00:00 2001 From: Arthur Silva Date: Mon, 7 Oct 2024 13:12:40 +0200 Subject: [PATCH] Improve SmallRng initialization performance (#1482) --- CHANGELOG.md | 1 + benches/benches/generators.rs | 58 +++++++++++++++++++++++++++++++++- src/rngs/small.rs | 3 +- src/rngs/xoshiro128plusplus.rs | 35 ++++++++++++++++---- src/rngs/xoshiro256plusplus.rs | 42 ++++++++++++++++++++---- 5 files changed, 123 insertions(+), 16 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3300b9ad9f..a09c14c563 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,7 @@ You may also find the [Upgrade Guide](https://rust-random.github.io/book/update. - Add `UniformUsize` and use to make `Uniform` for `usize` portable (#1487) - Remove support for generating `isize` and `usize` values with `Standard`, `Uniform` and `Fill` and usage as a `WeightedAliasIndex` weight (#1487) - Require `Clone` and `AsRef` bound for `SeedableRng::Seed`. (#1491) +- Improve SmallRng initialization performance (#1482) - Implement `Distribution` for `Poisson` (#1498) - Limit the maximal acceptable lambda for `Poisson` to solve (#1312) (#1498) - Rename `Rng::gen_iter` to `random_iter` (#1500) diff --git a/benches/benches/generators.rs b/benches/benches/generators.rs index 580d989755..bea4f60a6b 100644 --- a/benches/benches/generators.rs +++ b/benches/benches/generators.rs @@ -19,7 +19,7 @@ use rand_pcg::{Pcg32, Pcg64, Pcg64Dxsm, Pcg64Mcg}; criterion_group!( name = benches; config = Criterion::default(); - targets = gen_bytes, gen_u32, gen_u64, init_gen, reseeding_bytes + targets = gen_bytes, gen_u32, gen_u64, init_gen, init_from_u64, init_from_seed, reseeding_bytes ); criterion_main!(benches); @@ -133,6 +133,62 @@ pub fn init_gen(c: &mut Criterion) { bench::(&mut g, "chacha12"); bench::(&mut g, "chacha20"); bench::(&mut g, "std"); + bench::(&mut g, "small"); + + g.finish() +} + +pub fn init_from_u64(c: &mut Criterion) { + let mut g = c.benchmark_group("init_from_u64"); + g.warm_up_time(Duration::from_millis(500)); + g.measurement_time(Duration::from_millis(1000)); + + fn bench(g: &mut BenchmarkGroup, name: &str) { + g.bench_function(name, |b| { + let mut rng = Pcg32::from_os_rng(); + let seed = rng.random(); + b.iter(|| R::seed_from_u64(black_box(seed))); + }); + } + + bench::(&mut g, "pcg32"); + bench::(&mut g, "pcg64"); + bench::(&mut g, "pcg64mcg"); + bench::(&mut g, "pcg64dxsm"); + bench::(&mut g, "chacha8"); + bench::(&mut g, "chacha12"); + bench::(&mut g, "chacha20"); + bench::(&mut g, "std"); + bench::(&mut g, "small"); + + g.finish() +} + +pub fn init_from_seed(c: &mut Criterion) { + let mut g = c.benchmark_group("init_from_seed"); + g.warm_up_time(Duration::from_millis(500)); + g.measurement_time(Duration::from_millis(1000)); + + fn bench(g: &mut BenchmarkGroup, name: &str) + where + rand::distr::Standard: Distribution<::Seed>, + { + g.bench_function(name, |b| { + let mut rng = Pcg32::from_os_rng(); + let seed = rng.random(); + b.iter(|| R::from_seed(black_box(seed.clone()))); + }); + } + + bench::(&mut g, "pcg32"); + bench::(&mut g, "pcg64"); + bench::(&mut g, "pcg64mcg"); + bench::(&mut g, "pcg64dxsm"); + bench::(&mut g, "chacha8"); + bench::(&mut g, "chacha12"); + bench::(&mut g, "chacha20"); + bench::(&mut g, "std"); + bench::(&mut g, "small"); g.finish() } diff --git a/src/rngs/small.rs b/src/rngs/small.rs index ea7df06284..cfc6b0c988 100644 --- a/src/rngs/small.rs +++ b/src/rngs/small.rs @@ -83,7 +83,8 @@ impl SeedableRng for SmallRng { #[inline(always)] fn from_seed(seed: Self::Seed) -> Self { - // With MSRV >= 1.77: let seed = *seed.first_chunk().unwrap(); + // This is for compatibility with 32-bit platforms where Rng::Seed has a different seed size + // With MSRV >= 1.77: let seed = *seed.first_chunk().unwrap() const LEN: usize = core::mem::size_of::<::Seed>(); let seed = (&seed[..LEN]).try_into().unwrap(); SmallRng(Rng::from_seed(seed)) diff --git a/src/rngs/xoshiro128plusplus.rs b/src/rngs/xoshiro128plusplus.rs index 44e4222c88..6bcc33ba5d 100644 --- a/src/rngs/xoshiro128plusplus.rs +++ b/src/rngs/xoshiro128plusplus.rs @@ -33,29 +33,36 @@ impl SeedableRng for Xoshiro128PlusPlus { /// mapped to a different seed. #[inline] fn from_seed(seed: [u8; 16]) -> Xoshiro128PlusPlus { - if seed.iter().all(|&x| x == 0) { - return Self::seed_from_u64(0); - } let mut state = [0; 4]; read_u32_into(&seed, &mut state); + // Check for zero on aligned integers for better code generation. + // Furtermore, seed_from_u64(0) will expand to a constant when optimized. + if state.iter().all(|&x| x == 0) { + return Self::seed_from_u64(0); + } Xoshiro128PlusPlus { s: state } } /// Create a new `Xoshiro128PlusPlus` from a `u64` seed. /// /// This uses the SplitMix64 generator internally. + #[inline] fn seed_from_u64(mut state: u64) -> Self { const PHI: u64 = 0x9e3779b97f4a7c15; - let mut seed = Self::Seed::default(); - for chunk in seed.as_mut().chunks_mut(8) { + let mut s = [0; 4]; + for i in s.chunks_exact_mut(2) { state = state.wrapping_add(PHI); let mut z = state; z = (z ^ (z >> 30)).wrapping_mul(0xbf58476d1ce4e5b9); z = (z ^ (z >> 27)).wrapping_mul(0x94d049bb133111eb); z = z ^ (z >> 31); - chunk.copy_from_slice(&z.to_le_bytes()); + i[0] = z as u32; + i[1] = (z >> 32) as u32; } - Self::from_seed(seed) + // By using a non-zero PHI we are guaranteed to generate a non-zero state + // Thus preventing a recursion between from_seed and seed_from_u64. + debug_assert_ne!(s, [0; 4]); + Xoshiro128PlusPlus { s } } } @@ -113,4 +120,18 @@ mod tests { assert_eq!(rng.next_u32(), e); } } + + #[test] + fn stable_seed_from_u64() { + // We don't guarantee value-stability for SmallRng but this + // could influence keeping stability whenever possible (e.g. after optimizations). + let mut rng = Xoshiro128PlusPlus::seed_from_u64(0); + let expected = [ + 1179900579, 1938959192, 3089844957, 3657088315, 1015453891, 479942911, 3433842246, + 669252886, 3985671746, 2737205563, + ]; + for &e in &expected { + assert_eq!(rng.next_u32(), e); + } + } } diff --git a/src/rngs/xoshiro256plusplus.rs b/src/rngs/xoshiro256plusplus.rs index b356ff510c..b1c022e0f1 100644 --- a/src/rngs/xoshiro256plusplus.rs +++ b/src/rngs/xoshiro256plusplus.rs @@ -33,29 +33,35 @@ impl SeedableRng for Xoshiro256PlusPlus { /// mapped to a different seed. #[inline] fn from_seed(seed: [u8; 32]) -> Xoshiro256PlusPlus { - if seed.iter().all(|&x| x == 0) { - return Self::seed_from_u64(0); - } let mut state = [0; 4]; read_u64_into(&seed, &mut state); + // Check for zero on aligned integers for better code generation. + // Furtermore, seed_from_u64(0) will expand to a constant when optimized. + if state.iter().all(|&x| x == 0) { + return Self::seed_from_u64(0); + } Xoshiro256PlusPlus { s: state } } /// Create a new `Xoshiro256PlusPlus` from a `u64` seed. /// /// This uses the SplitMix64 generator internally. + #[inline] fn seed_from_u64(mut state: u64) -> Self { const PHI: u64 = 0x9e3779b97f4a7c15; - let mut seed = Self::Seed::default(); - for chunk in seed.as_mut().chunks_mut(8) { + let mut s = [0; 4]; + for i in s.iter_mut() { state = state.wrapping_add(PHI); let mut z = state; z = (z ^ (z >> 30)).wrapping_mul(0xbf58476d1ce4e5b9); z = (z ^ (z >> 27)).wrapping_mul(0x94d049bb133111eb); z = z ^ (z >> 31); - chunk.copy_from_slice(&z.to_le_bytes()); + *i = z; } - Self::from_seed(seed) + // By using a non-zero PHI we are guaranteed to generate a non-zero state + // Thus preventing a recursion between from_seed and seed_from_u64. + debug_assert_ne!(s, [0; 4]); + Xoshiro256PlusPlus { s } } } @@ -126,4 +132,26 @@ mod tests { assert_eq!(rng.next_u64(), e); } } + + #[test] + fn stable_seed_from_u64() { + // We don't guarantee value-stability for SmallRng but this + // could influence keeping stability whenever possible (e.g. after optimizations). + let mut rng = Xoshiro256PlusPlus::seed_from_u64(0); + let expected = [ + 5987356902031041503, + 7051070477665621255, + 6633766593972829180, + 211316841551650330, + 9136120204379184874, + 379361710973160858, + 15813423377499357806, + 15596884590815070553, + 5439680534584881407, + 1369371744833522710, + ]; + for &e in &expected { + assert_eq!(rng.next_u64(), e); + } + } }