Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

If fallthrough #109

Draft
wants to merge 16 commits into
base: main
Choose a base branch
from
3 changes: 3 additions & 0 deletions .github/workflows/bench.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ jobs:
steps:
- uses: actions/checkout@v4

- name: Update rust
run: rustup update

- name: Switch to nightly rust
run: rustup default nightly

Expand Down
6 changes: 6 additions & 0 deletions .github/workflows/build_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ jobs:
steps:
- uses: actions/checkout@v3

- name: Switch to nightly rust
run: rustup default nightly

- name: Rust version
run: cargo rustc -- --version

Expand Down Expand Up @@ -52,6 +55,9 @@ jobs:
steps:
- uses: actions/checkout@v3

- name: Switch to nightly rust
run: rustup default nightly

- name: Rust version
run: cargo rustc -- --version

Expand Down
5 changes: 5 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ highway = "1.2.0"
seahash = "4.1.0"
metrohash = "1.0.6"
fnv = "1.0.7"
foldhash = "0.1.3"

[dev-dependencies.plotters]
version = "0.3.7"
Expand All @@ -64,5 +65,9 @@ harness = false
name = "quality"
harness = false

[[bench]]
name = "read_beyond"
harness = false

[[example]]
name = "hello_world"
32 changes: 32 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,38 @@ The `throughput` benchmark is custom (it does not rely on criterion.rs). In an a
![x86_64](./benches/throughput/x86_64.svg)
![x86_64-hybrid](./benches/throughput/x86_64-hybrid.svg)

### Quality

This repository includes some of the SMHasher quality tests rewritten in Rust. This allows us to easily assess the quality of GxHash and other hash functions and on different platforms.
```bash
cargo bench --bench quality
```

This will output the results like this:
```rust
Bench GxHash
✅ avalanche::<B,4>()
...
✅ avalanche::<B,512>()
✅ distribution_values::<B,4>(128*128)
...
✅ distribution_values::<B,512>(128*128)
✅ distribution_bits::<B,4>()
...
✅ collisions_padded_zeroes::<B>(128*128)
✅ collisions_flipped_bits::<B,2>(9)
...
✅ collisions_permute::<B,u8>(4,&Vec::from_iter(0..16))
...
✅ collisions_permute::<B,u128>(42,&Vec::from_iter(0..64))
✅ collisions_powerset_bytes::<B>(&[0,1,2,3,4,5,6,7,8,9])
...
✅ hasher_collisions_permute::<B,u8>(&[0,1,2,3,4,5,6,7,8,9])
...
❌ some_quality_criterion::<B,32>(3)
| Score: 0.0000143. Expected is 0.
```

## Contributing

- Feel free to submit PRs
Expand Down
5 changes: 5 additions & 0 deletions benches/quality/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,16 @@ use rand::Rng;
use criterion::black_box;

fn main() {
// Passing hash functions ✅
bench_hasher_quality::<gxhash::GxBuildHasher>("GxHash");
bench_hasher_quality::<std::collections::hash_map::RandomState>("Default");
bench_hasher_quality::<twox_hash::xxh3::RandomHashBuilder64>("XxHash (XXH3)");
bench_hasher_quality::<ahash::RandomState>("AHash");
bench_hasher_quality::<t1ha::T1haBuildHasher>("T1ha");

// Not passing hash functions ❌
bench_hasher_quality::<fnv::FnvBuildHasher>("FNV-1a");
bench_hasher_quality::<foldhash::quality::RandomState>("FoldHash");
}

macro_rules! check {
Expand All @@ -31,6 +35,7 @@ fn bench_hasher_quality<B>(name: &str)

check!(avalanche::<B, 4>());
check!(avalanche::<B, 10>());
check!(avalanche::<B, 16>());
check!(avalanche::<B, 32>());
check!(avalanche::<B, 128>());
check!(avalanche::<B, 512>());
Expand Down
188 changes: 188 additions & 0 deletions benches/read_beyond.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
#![feature(portable_simd)]
#![feature(core_intrinsics)]

use criterion::{black_box, criterion_group, criterion_main, Criterion};
use std::simd::*;
use std::mem::transmute;

#[cfg(target_arch = "aarch64")]
mod arch {

// Macbook pro M1
// get_partial_safe/copy (4)
// time: [7.5658 ns 7.6379 ns 7.7465 ns]
// get_partial_safe/urbd (4)
// time: [1.2707 ns 1.2803 ns 1.2944 ns]
// get_partial_safe/simd_masked_load (4)
// time: [2.9972 ns 3.0029 ns 3.0107 ns]
// get_partial_safe/portable_simd (4)
// time: [3.8087 ns 3.8305 ns 3.8581 ns]

// AMD Ryzen 5 5625U
// get_partial_safe/copy (4)
// time: [9.0579 ns 9.0854 ns 9.1167 ns]
// get_partial_safe/urbd (4)
// time: [4.6165 ns 4.6203 ns 4.6244 ns]
// get_partial_safe/simd_masked_load (4)
// time: [3.2439 ns 3.2556 ns 3.2746 ns]
// get_partial_safe/portable_simd (4)
// time: [3.3122 ns 3.3192 ns 3.3280 ns]

use super::*;
use core::arch::aarch64::*;

pub type State = int8x16_t;

#[inline(always)]
pub unsafe fn copy(data: *const State, len: usize) -> State {
// Temporary buffer filled with zeros
let mut buffer = [0i8; 16];
// Copy data into the buffer
core::ptr::copy(data as *const i8, buffer.as_mut_ptr(), len);
// Load the buffer into a __m256i vector
let partial_vector = vld1q_s8(buffer.as_ptr());
vaddq_s8(partial_vector, vdupq_n_s8(len as i8))
}

#[inline(always)]
pub unsafe fn urbd(data: *const State, len: usize) -> State {
// Stripped of page check for simplicity, might crash program
let indices = vld1q_s8([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15].as_ptr());
let mask = vcgtq_s8(vdupq_n_s8(len as i8), indices);
vandq_s8(vld1q_s8(data as *const i8), vreinterpretq_s8_u8(mask))
}

#[inline(always)]
pub unsafe fn urbd_asm(data: *const State, len: usize) -> State {
// Stripped of page check for simplicity, might crash program
let indices = vld1q_s8([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15].as_ptr());
let mask = vcgtq_s8(vdupq_n_s8(len as i8), indices);
let oob_vector = vld1q_s8(data as *const i8); // asm to do
vandq_s8(oob_vector, vreinterpretq_s8_u8(mask))
}

#[inline(always)]
pub unsafe fn simd_masked_load(data: *const State, len: usize) -> State {
let indices = vld1q_s8([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15].as_ptr());
let mask = vreinterpretq_s8_u8(vcgtq_s8(vdupq_n_s8(len as i8), indices));
std::intrinsics::simd::simd_masked_load(mask, data as *const i8, vdupq_n_s8(len as i8))
}

#[inline(always)]
pub unsafe fn portable_simd(data: *const State, len: usize) -> State {
let slice = std::slice::from_raw_parts(data as *const i8, len);
let data: Simd<i8, 16> = Simd::<i8, 16>::load_or_default(&slice);
transmute(data)
}
}

#[cfg(target_arch = "x86_64")]
mod arch {
use super::*;
use core::arch::x86_64::*;

pub type State = __m128i;

#[inline(always)]
pub unsafe fn copy(data: *const State, len: usize) -> State {
// Temporary buffer filled with zeros
let mut buffer = [0i8; 16];
// Copy data into the buffer
core::ptr::copy(data as *const i8, buffer.as_mut_ptr(), len);
// // Load the buffer into a __m256i vector
let partial_vector = _mm_loadu_si128(buffer.as_ptr() as *const State);
_mm_add_epi8(partial_vector, _mm_set1_epi8(len as i8))
}

#[inline(always)]
pub unsafe fn urbd(data: *const State, len: usize) -> State {
// Stripped of page check for simplicity, might crash program
let indices = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
let mask = _mm_cmpgt_epi8(_mm_set1_epi8(len as i8), indices);
_mm_and_si128(_mm_loadu_si128(data), mask)
}

#[inline(always)]
pub unsafe fn urbd_asm(data: *const State, len: usize) -> State {
use std::arch::asm;
// Stripped of page check for simplicity, might crash program
let indices = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
let mask = _mm_cmpgt_epi8(_mm_set1_epi8(len as i8), indices);
let mut oob_vector: State;
asm!("movdqu [{}], {}", in(reg) data, out(xmm_reg) oob_vector, options(pure, nomem, nostack));
_mm_and_si128(oob_vector, mask)
}

#[inline(always)]
pub unsafe fn simd_masked_load(data: *const State, len: usize) -> State {
let indices = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
let mask = _mm_cmpgt_epi8(_mm_set1_epi8(len as i8), indices);
State::from(std::intrinsics::simd::simd_masked_load(core::simd::i8x16::from(mask), data as *const i8, core::simd::i8x16::from(_mm_set1_epi8(len as i8))))
}

#[inline(always)]
pub unsafe fn portable_simd(data: *const State, len: usize) -> State {
let slice = std::slice::from_raw_parts(data as *const i8, len);
let data: Simd<i8, 16> = Simd::<i8, 16>::load_or_default(&slice);
transmute(data)
}
}

fn benchmark(c: &mut Criterion) {
let mut group = c.benchmark_group("get_partial_safe");

// Prepare test data
let test_data: arch::State = unsafe { std::mem::zeroed() };

// Benchmark with different lengths
for &len in &[4, 8, 12, 16] {
group.bench_function(format!("copy ({})", len), |b| {
b.iter(|| unsafe {
black_box(arch::copy(
black_box(&test_data as *const arch::State),
black_box(len),
))
})
});

group.bench_function(format!("urbd ({})", len), |b| {
b.iter(|| unsafe {
black_box(arch::urbd(
black_box(&test_data as *const arch::State),
black_box(len),
))
})
});

group.bench_function(format!("urbd_asm ({})", len), |b| {
b.iter(|| unsafe {
black_box(arch::urbd(
black_box(&test_data as *const arch::State),
black_box(len),
))
})
});

group.bench_function(format!("simd_masked_load ({})", len), |b| {
b.iter(|| unsafe {
black_box(arch::simd_masked_load(
black_box(&test_data as *const arch::State),
black_box(len),
))
})
});

group.bench_function(format!("portable_simd ({})", len), |b| {
b.iter(|| unsafe {
black_box(arch::portable_simd(
black_box(&test_data as *const arch::State),
black_box(len),
))
})
});
}

group.finish();
}
criterion_group!(benches, benchmark);
criterion_main!(benches);
Loading