Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve accuracy of throughput benchmark and log scale chart output #105

Merged
merged 1 commit into from
Nov 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion benches/hashset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ fn iterate<T, B>(b: &mut criterion::Bencher<'_>, value: &T, set: &mut HashSet<T,
set.insert(T::default());
b.iter(|| {
// We intentionally check on a value that is not present, otherwise there will be an
// additional equality check perform, diluting the hashing time and biasing the benchmark.
// additional equality check performed, diluting the hashing time and biasing the benchmark.
set.contains(criterion::black_box(value))
});
}
Expand Down
51 changes: 23 additions & 28 deletions benches/throughput/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ mod result_processor;

use result_processor::*;

use std::hash::Hasher;
use std::hint::black_box;
use std::hash::Hasher;
use std::time::{Instant, Duration};
use std::alloc::{alloc, dealloc, Layout};
use std::slice;
Expand All @@ -12,9 +12,8 @@ use rand::Rng;

use gxhash::*;

const ITERATIONS: u32 = 1000;
const ITERATIONS: usize = 1000;
const MAX_RUN_DURATION: Duration = Duration::from_millis(1000);
const FORCE_NO_INLINING: bool = false;

fn main() {
let mut rng = rand::thread_rng();
Expand Down Expand Up @@ -49,7 +48,7 @@ fn main() {
});

// AHash
let ahash_hasher = ahash::RandomState::with_seeds(0, 0, 0, 0);
let ahash_hasher = ahash::RandomState::with_seed(42);
benchmark(processor.as_mut(), slice, "AHash", |data: &[u8], _: i32| -> u64 {
ahash_hasher.hash_one(data)
});
Expand Down Expand Up @@ -91,7 +90,7 @@ fn main() {
}

fn benchmark<F, S>(processor: &mut dyn ResultProcessor, data: &[u8], name: &str, delegate: F)
where F: Fn(&[u8], S) -> u64, S: Default + TryFrom<u128> + TryInto<usize>
where F: Fn(&[u8], S) -> u64, S: Default + TryFrom<u128> + TryInto<usize> + Clone + Copy
{
processor.on_start(name);
for i in 2.. {
Expand All @@ -101,22 +100,20 @@ fn benchmark<F, S>(processor: &mut dyn ResultProcessor, data: &[u8], name: &str,
}

// Warmup
black_box(time(ITERATIONS, &|| delegate(&data[..len], S::default())));
time::<_, _, ITERATIONS>(&delegate, &data[..len], S::default());

let mut durations_s = vec![];
let now = Instant::now();
while now.elapsed() < MAX_RUN_DURATION {
// Make seed unpredictable to prevent optimizations
let seed = S::try_from(now.elapsed().as_nanos())
.unwrap_or_else(|_| panic!("Something went horribly wrong!"));
let seed = S::try_from(now.elapsed().as_nanos()).unwrap_or_else(|_| panic!());
// Offset slice by an unpredictable amount to prevent optimization (pre caching)
// and make the benchmark use both aligned and unaligned data
let start = S::try_into(seed)
.unwrap_or_else(|_| panic!("Something went horribly wrong!")) & 0xFF;
let start = S::try_into(seed).unwrap_or_else(|_| panic!()) & 0xFF;
let end = start + len;
let slice = &data[start..end];
// Execute method for a new iterations
let duration = time(ITERATIONS, &|| delegate(slice, S::default()));
let duration = time::<_, _, ITERATIONS>(&delegate, slice, seed);
durations_s.push(duration.as_secs_f64());
}
let average_duration_s = calculate_average_without_outliers(&mut durations_s);
Expand All @@ -127,30 +124,28 @@ fn benchmark<F, S>(processor: &mut dyn ResultProcessor, data: &[u8], name: &str,
processor.on_end();
}

#[inline(never)]
fn time<F>(iterations: u32, delegate: &F) -> Duration
where F: Fn() -> u64
fn time<F, S, const N: usize>(delegate: F, slice: &[u8], seed: S) -> Duration
where F: Fn(&[u8], S) -> u64, S: Default + TryFrom<u128> + TryInto<usize> + Clone + Copy
{
let now = Instant::now();
// Bench the same way to what is done in criterion.rs
// Time measurement similar to what is done in criterion.rs
// https://github.com/bheisler/criterion.rs/blob/e1a8c9ab2104fbf2d15f700d0038b2675054a2c8/src/bencher.rs#L87
for _ in 0..iterations {
if FORCE_NO_INLINING {
black_box(execute_noinlining(delegate));
} else {
black_box(delegate());
}
}
let now = Instant::now();
iter::<F, S, N>(delegate, slice, seed);
now.elapsed()
}

// Some algorithm are more likely to be inlined than others.
// This puts then all at the same level. But is it fair?
// The content might be inlined, but the function itself should not be inlined
// This favors benchmarked methods with small byte code size, which is more realistic
#[inline(never)]
fn execute_noinlining<F>(delegate: &F) -> u64
where F: Fn() -> u64
fn iter<F, S, const N: usize>(delegate: F, slice: &[u8], seed: S)
where F: Fn(&[u8], S) -> u64, S: Default + TryFrom<u128> + TryInto<usize> + Clone + Copy
{
delegate()
for _ in 0..N {
// Black box the result to prevent the compiler from optimizing the operation away
// Black box the slice to prevent the compiler to assume the slice is constant
// We don't black box the seed because it's likely to be constant in most real-world usage scenarios
black_box(delegate(black_box(slice), seed));
}
}

// Outliers are inevitable, especially on a low number of iterations
Expand Down
6 changes: 3 additions & 3 deletions benches/throughput/result_processor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ impl ResultProcessor for OutputPlot {
let x_min = self.series.iter().next().unwrap().1.iter().map(|(x, _)| *x as u32).min().unwrap();
let x_max = self.series.iter().next().unwrap().1.iter().map(|(x, _)| *x as u32).max().unwrap();

let y_min = 0u32;
let y_min = self.series.iter().flat_map(|inner_map| inner_map.1.iter()).map(|(_, y)| (0.95 * *y) as u32).min().unwrap();
let y_max = self.series.iter().flat_map(|inner_map| inner_map.1.iter()).map(|(_, y)| (1.05 * *y) as u32).max().unwrap();

let mut chart = ChartBuilder::on(&canvas)
Expand All @@ -144,8 +144,8 @@ impl ResultProcessor for OutputPlot {
(x_min..x_max)
.log_scale()
.with_key_points(self.series.iter().next().unwrap().1.iter().map(|(x, _)| *x as u32).collect::<Vec<u32>>()),
y_min..y_max
//.log_scale(),
(y_min..y_max)
.log_scale(),
).unwrap();

chart
Expand Down
Loading