Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve SmallRng initialization performance #1482

Merged
merged 4 commits into from
Oct 7, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ You may also find the [Upgrade Guide](https://rust-random.github.io/book/update.
- Add `UniformUsize` and use to make `Uniform` for `usize` portable (#1487)
- Remove support for generating `isize` and `usize` values with `Standard`, `Uniform` and `Fill` and usage as a `WeightedAliasIndex` weight (#1487)
- Require `Clone` and `AsRef` bound for `SeedableRng::Seed`. (#1491)
- Improve SmallRng initialization performance

## [0.9.0-alpha.1] - 2024-03-18
- Add the `Slice::num_choices` method to the Slice distribution (#1402)
Expand Down
58 changes: 57 additions & 1 deletion benches/benches/generators.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ use rand_pcg::{Pcg32, Pcg64, Pcg64Dxsm, Pcg64Mcg};
criterion_group!(
name = benches;
config = Criterion::default();
targets = gen_bytes, gen_u32, gen_u64, init_gen, reseeding_bytes
targets = gen_bytes, gen_u32, gen_u64, init_gen, init_from_u64, init_from_seed, reseeding_bytes
);
criterion_main!(benches);

Expand Down Expand Up @@ -133,6 +133,62 @@ pub fn init_gen(c: &mut Criterion) {
bench::<ChaCha12Rng>(&mut g, "chacha12");
bench::<ChaCha20Rng>(&mut g, "chacha20");
bench::<StdRng>(&mut g, "std");
bench::<SmallRng>(&mut g, "small");

g.finish()
}

pub fn init_from_u64(c: &mut Criterion) {
let mut g = c.benchmark_group("init_from_u64");
g.warm_up_time(Duration::from_millis(500));
g.measurement_time(Duration::from_millis(1000));

fn bench<R: SeedableRng>(g: &mut BenchmarkGroup<WallTime>, name: &str) {
g.bench_function(name, |b| {
let mut rng = Pcg32::from_os_rng();
let seed = rng.random();
b.iter(|| R::seed_from_u64(black_box(seed)));
});
}

bench::<Pcg32>(&mut g, "pcg32");
bench::<Pcg64>(&mut g, "pcg64");
bench::<Pcg64Mcg>(&mut g, "pcg64mcg");
bench::<Pcg64Dxsm>(&mut g, "pcg64dxsm");
bench::<ChaCha8Rng>(&mut g, "chacha8");
bench::<ChaCha12Rng>(&mut g, "chacha12");
bench::<ChaCha20Rng>(&mut g, "chacha20");
bench::<StdRng>(&mut g, "std");
bench::<SmallRng>(&mut g, "small");

g.finish()
}

pub fn init_from_seed(c: &mut Criterion) {
let mut g = c.benchmark_group("init_from_seed");
g.warm_up_time(Duration::from_millis(500));
g.measurement_time(Duration::from_millis(1000));

fn bench<R: SeedableRng>(g: &mut BenchmarkGroup<WallTime>, name: &str)
where
rand::distr::Standard: Distribution<<R as SeedableRng>::Seed>,
{
g.bench_function(name, |b| {
let mut rng = Pcg32::from_os_rng();
let seed = rng.random();
b.iter(|| R::from_seed(black_box(seed.clone())));
});
}

bench::<Pcg32>(&mut g, "pcg32");
bench::<Pcg64>(&mut g, "pcg64");
bench::<Pcg64Mcg>(&mut g, "pcg64mcg");
bench::<Pcg64Dxsm>(&mut g, "pcg64dxsm");
bench::<ChaCha8Rng>(&mut g, "chacha8");
bench::<ChaCha12Rng>(&mut g, "chacha12");
bench::<ChaCha20Rng>(&mut g, "chacha20");
bench::<StdRng>(&mut g, "std");
bench::<SmallRng>(&mut g, "small");

g.finish()
}
Expand Down
3 changes: 2 additions & 1 deletion src/rngs/small.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,8 @@ impl SeedableRng for SmallRng {

#[inline(always)]
fn from_seed(seed: Self::Seed) -> Self {
// With MSRV >= 1.77: let seed = *seed.first_chunk().unwrap();
arthurprs marked this conversation as resolved.
Show resolved Hide resolved
// This is for compatibility with 32-bit platforms where Rng::Seed has a different seed size
// With MSRV >= 1.77: let seed = *seed.first_chunk().unwrap()
const LEN: usize = core::mem::size_of::<<Rng as SeedableRng>::Seed>();
let seed = (&seed[..LEN]).try_into().unwrap();
SmallRng(Rng::from_seed(seed))
Expand Down
21 changes: 14 additions & 7 deletions src/rngs/xoshiro128plusplus.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,29 +33,36 @@ impl SeedableRng for Xoshiro128PlusPlus {
/// mapped to a different seed.
#[inline]
fn from_seed(seed: [u8; 16]) -> Xoshiro128PlusPlus {
if seed.iter().all(|&x| x == 0) {
return Self::seed_from_u64(0);
}
let mut state = [0; 4];
read_u32_into(&seed, &mut state);
// Check for zero on aligned integers for better code generation.
// Furtermore, seed_from_u64(0) will expand to a constant when optimized.
if state.iter().all(|&x| x == 0) {
dhardy marked this conversation as resolved.
Show resolved Hide resolved
return Self::seed_from_u64(0);
}
Xoshiro128PlusPlus { s: state }
}

/// Create a new `Xoshiro128PlusPlus` from a `u64` seed.
///
/// This uses the SplitMix64 generator internally.
#[inline]
fn seed_from_u64(mut state: u64) -> Self {
const PHI: u64 = 0x9e3779b97f4a7c15;
let mut seed = Self::Seed::default();
for chunk in seed.as_mut().chunks_mut(8) {
let mut s = [0; 4];
for i in s.chunks_exact_mut(2) {
state = state.wrapping_add(PHI);
let mut z = state;
z = (z ^ (z >> 30)).wrapping_mul(0xbf58476d1ce4e5b9);
z = (z ^ (z >> 27)).wrapping_mul(0x94d049bb133111eb);
z = z ^ (z >> 31);
chunk.copy_from_slice(&z.to_le_bytes());
i[0] = z.to_le() as u32;
i[1] = (z.to_le() >> 32) as u32;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We aren't round-tripping to LE bytes and back since your code now directly initializes state, hence we don't want .to_le() here.

This should trip a test, but it seems we never actually test seed_from_u64... because we don't guarantee value-stability for SmallRng anyway. (In other words, this isn't strictly a bug, but you should still remove .to_le(), and preferably also add some form of reference test for this method.)

Copy link
Contributor Author

@arthurprs arthurprs Oct 1, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If I remove it, won't the result be different than before on BE? I was trying to preserve that here. But yeah, kinda pointless w/o a test to ensure that it's correct.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you mean that this PR should add test for the expected state of the Rng after seed_from_u64?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's a reference test in the same file, but in this case testing a single sample should be enough. And yes, results should match from prior to your PR.

It doesn't really matter since we explicitly do not promise value stability of these RNGs, but feel like we should anyway.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done in bee90d1 and 4d6b651

}
Self::from_seed(seed)
// By using a non-zero PHI we are guaranteed to generate a non-zero state
// Thus preventing a recursion between from_seed and seed_from_u64.
debug_assert_ne!(s, [0; 4]);
Xoshiro128PlusPlus { s }
}
}

Expand Down
20 changes: 13 additions & 7 deletions src/rngs/xoshiro256plusplus.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,29 +33,35 @@ impl SeedableRng for Xoshiro256PlusPlus {
/// mapped to a different seed.
#[inline]
fn from_seed(seed: [u8; 32]) -> Xoshiro256PlusPlus {
if seed.iter().all(|&x| x == 0) {
return Self::seed_from_u64(0);
}
let mut state = [0; 4];
read_u64_into(&seed, &mut state);
// Check for zero on aligned integers for better code generation.
// Furtermore, seed_from_u64(0) will expand to a constant when optimized.
if state.iter().all(|&x| x == 0) {
return Self::seed_from_u64(0);
}
Xoshiro256PlusPlus { s: state }
}

/// Create a new `Xoshiro256PlusPlus` from a `u64` seed.
///
/// This uses the SplitMix64 generator internally.
#[inline]
fn seed_from_u64(mut state: u64) -> Self {
const PHI: u64 = 0x9e3779b97f4a7c15;
let mut seed = Self::Seed::default();
for chunk in seed.as_mut().chunks_mut(8) {
let mut s = [0; 4];
for i in s.iter_mut() {
state = state.wrapping_add(PHI);
let mut z = state;
z = (z ^ (z >> 30)).wrapping_mul(0xbf58476d1ce4e5b9);
z = (z ^ (z >> 27)).wrapping_mul(0x94d049bb133111eb);
z = z ^ (z >> 31);
chunk.copy_from_slice(&z.to_le_bytes());
*i = z.to_le();
arthurprs marked this conversation as resolved.
Show resolved Hide resolved
}
Self::from_seed(seed)
// By using a non-zero PHI we are guaranteed to generate a non-zero state
// Thus preventing a recursion between from_seed and seed_from_u64.
debug_assert_ne!(s, [0; 4]);
Xoshiro256PlusPlus { s }
}
}

Expand Down