ogxd · ogxd · Nov 6, 2024 · Nov 12, 2024 · Nov 12, 2024 · Nov 12, 2024
diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml
@@ -32,6 +32,9 @@ jobs:
     steps:
     - uses: actions/checkout@v4
 
+    - name: Update rust
+      run: rustup update
+
     - name: Switch to nightly rust
       run: rustup default nightly
 

diff --git a/.github/workflows/build_test.yml b/.github/workflows/build_test.yml
@@ -17,6 +17,9 @@ jobs:
     steps:
     - uses: actions/checkout@v3
 
+    - name: Switch to nightly rust
+      run: rustup default nightly
+
     - name: Rust version
       run: cargo rustc -- --version
 
@@ -52,6 +55,9 @@ jobs:
     steps:
     - uses: actions/checkout@v3
 
+    - name: Switch to nightly rust
+      run: rustup default nightly
+
     - name: Rust version
       run: cargo rustc -- --version
 

diff --git a/Cargo.toml b/Cargo.toml
@@ -38,6 +38,7 @@ highway = "1.2.0"
 seahash = "4.1.0"
 metrohash = "1.0.6"
 fnv = "1.0.7"
+foldhash = "0.1.3"
 
 [dev-dependencies.plotters]
 version = "0.3.7"
@@ -64,5 +65,9 @@ harness = false
 name = "quality"
 harness = false
 
+[[bench]]
+name = "read_beyond"
+harness = false
+
 [[example]]
 name = "hello_world"
diff --git a/README.md b/README.md
@@ -124,6 +124,38 @@ The `throughput` benchmark is custom (it does not rely on criterion.rs). In an a
 ![x86_64](./benches/throughput/x86_64.svg)
 ![x86_64-hybrid](./benches/throughput/x86_64-hybrid.svg)
 
+### Quality
+
+This repository includes some of the SMHasher quality tests rewritten in Rust. This allows us to easily assess the quality of GxHash and other hash functions and on different platforms.
+```bash
+cargo bench --bench quality
+```
+
+This will output the results like this:
+```rust
+Bench GxHash
+  ✅ avalanche::<B,4>()
+  ...
+  ✅ avalanche::<B,512>()
+  ✅ distribution_values::<B,4>(128*128)
+  ...
+  ✅ distribution_values::<B,512>(128*128)
+  ✅ distribution_bits::<B,4>()
+  ...
+  ✅ collisions_padded_zeroes::<B>(128*128)
+  ✅ collisions_flipped_bits::<B,2>(9)
+  ...
+  ✅ collisions_permute::<B,u8>(4,&Vec::from_iter(0..16))
+  ...
+  ✅ collisions_permute::<B,u128>(42,&Vec::from_iter(0..64))
+  ✅ collisions_powerset_bytes::<B>(&[0,1,2,3,4,5,6,7,8,9])
+  ...
+  ✅ hasher_collisions_permute::<B,u8>(&[0,1,2,3,4,5,6,7,8,9])
+  ...
+  ❌ some_quality_criterion::<B,32>(3)
+    | Score: 0.0000143. Expected is 0.
+```
+
 ## Contributing
 
 - Feel free to submit PRs

diff --git a/benches/quality/main.rs b/benches/quality/main.rs
@@ -3,12 +3,16 @@ use rand::Rng;
 use criterion::black_box;
 
 fn main() {
+    // Passing hash functions ✅
     bench_hasher_quality::<gxhash::GxBuildHasher>("GxHash");
     bench_hasher_quality::<std::collections::hash_map::RandomState>("Default");
     bench_hasher_quality::<twox_hash::xxh3::RandomHashBuilder64>("XxHash (XXH3)");
     bench_hasher_quality::<ahash::RandomState>("AHash");
     bench_hasher_quality::<t1ha::T1haBuildHasher>("T1ha");
+
+    // Not passing hash functions ❌
     bench_hasher_quality::<fnv::FnvBuildHasher>("FNV-1a");
+    bench_hasher_quality::<foldhash::quality::RandomState>("FoldHash");
 }
 
 macro_rules! check {
@@ -31,6 +35,7 @@ fn bench_hasher_quality<B>(name: &str)
 
     check!(avalanche::<B, 4>());
     check!(avalanche::<B, 10>());
+    check!(avalanche::<B, 16>());
     check!(avalanche::<B, 32>());
     check!(avalanche::<B, 128>());
     check!(avalanche::<B, 512>());

diff --git a/benches/read_beyond.rs b/benches/read_beyond.rs
@@ -0,0 +1,188 @@
+#![feature(portable_simd)]
+#![feature(core_intrinsics)]
+
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use std::simd::*;
+use std::mem::transmute;
+
+#[cfg(target_arch = "aarch64")]
+mod arch {
+
+    // Macbook pro M1
+    // get_partial_safe/copy (4)
+    //                         time:   [7.5658 ns 7.6379 ns 7.7465 ns]
+    // get_partial_safe/urbd (4)
+    //                         time:   [1.2707 ns 1.2803 ns 1.2944 ns]
+    // get_partial_safe/simd_masked_load (4)
+    //                         time:   [2.9972 ns 3.0029 ns 3.0107 ns]
+    // get_partial_safe/portable_simd (4)
+    //                         time:   [3.8087 ns 3.8305 ns 3.8581 ns]
+
+    // AMD Ryzen 5 5625U
+    // get_partial_safe/copy (4)
+    //                         time:   [9.0579 ns 9.0854 ns 9.1167 ns]
+    // get_partial_safe/urbd (4)
+    //                         time:   [4.6165 ns 4.6203 ns 4.6244 ns]
+    // get_partial_safe/simd_masked_load (4)
+    //                         time:   [3.2439 ns 3.2556 ns 3.2746 ns]
+    // get_partial_safe/portable_simd (4)
+    //                         time:   [3.3122 ns 3.3192 ns 3.3280 ns]
+
+    use super::*;
+    use core::arch::aarch64::*;
+
+    pub type State = int8x16_t;
+
+    #[inline(always)]
+    pub unsafe fn copy(data: *const State, len: usize) -> State {
+        // Temporary buffer filled with zeros
+        let mut buffer = [0i8; 16];
+        // Copy data into the buffer
+        core::ptr::copy(data as *const i8, buffer.as_mut_ptr(), len);
+        // Load the buffer into a __m256i vector
+        let partial_vector = vld1q_s8(buffer.as_ptr());
+        vaddq_s8(partial_vector, vdupq_n_s8(len as i8))
+    }
+
+    #[inline(always)]
+    pub unsafe fn urbd(data: *const State, len: usize) -> State {
+        // Stripped of page check for simplicity, might crash program
+        let indices = vld1q_s8([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15].as_ptr());
+        let mask = vcgtq_s8(vdupq_n_s8(len as i8), indices);
+        vandq_s8(vld1q_s8(data as *const i8), vreinterpretq_s8_u8(mask))
+    }
+
+    #[inline(always)]
+    pub unsafe fn urbd_asm(data: *const State, len: usize) -> State {
+        // Stripped of page check for simplicity, might crash program
+        let indices = vld1q_s8([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15].as_ptr());
+        let mask = vcgtq_s8(vdupq_n_s8(len as i8), indices);
+        let oob_vector = vld1q_s8(data as *const i8); // asm to do
+        vandq_s8(oob_vector, vreinterpretq_s8_u8(mask))
+    }
+
+    #[inline(always)]
+    pub unsafe fn simd_masked_load(data: *const State, len: usize) -> State {
+        let indices = vld1q_s8([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15].as_ptr());
+        let mask = vreinterpretq_s8_u8(vcgtq_s8(vdupq_n_s8(len as i8), indices));
+        std::intrinsics::simd::simd_masked_load(mask, data as *const i8, vdupq_n_s8(len as i8))
+    }
+
+    #[inline(always)]
+    pub unsafe fn portable_simd(data: *const State, len: usize) -> State {
+        let slice = std::slice::from_raw_parts(data as *const i8, len);
+        let data: Simd<i8, 16> = Simd::<i8, 16>::load_or_default(&slice);
+        transmute(data)
+    }
+}
+
+#[cfg(target_arch = "x86_64")]
+mod arch {
+    use super::*;
+    use core::arch::x86_64::*;
+
+    pub type State = __m128i;
+
+    #[inline(always)]
+    pub unsafe fn copy(data: *const State, len: usize) -> State {
+        // Temporary buffer filled with zeros
+        let mut buffer = [0i8; 16];
+        // Copy data into the buffer
+        core::ptr::copy(data as *const i8, buffer.as_mut_ptr(), len);
+        // // Load the buffer into a __m256i vector
+        let partial_vector = _mm_loadu_si128(buffer.as_ptr() as *const State);
+        _mm_add_epi8(partial_vector, _mm_set1_epi8(len as i8))
+    }
+
+    #[inline(always)]
+    pub unsafe fn urbd(data: *const State, len: usize) -> State {
+        // Stripped of page check for simplicity, might crash program
+        let indices = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+        let mask = _mm_cmpgt_epi8(_mm_set1_epi8(len as i8), indices);
+        _mm_and_si128(_mm_loadu_si128(data), mask)
+    }
+
+    #[inline(always)]
+    pub unsafe fn urbd_asm(data: *const State, len: usize) -> State {
+        use std::arch::asm;
+        // Stripped of page check for simplicity, might crash program
+        let indices = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+        let mask = _mm_cmpgt_epi8(_mm_set1_epi8(len as i8), indices);
+        let mut oob_vector: State;
+        asm!("movdqu [{}], {}", in(reg) data, out(xmm_reg) oob_vector, options(pure, nomem, nostack));
+        _mm_and_si128(oob_vector, mask)
+    }
+
+    #[inline(always)]
+    pub unsafe fn simd_masked_load(data: *const State, len: usize) -> State {
+        let indices = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+        let mask = _mm_cmpgt_epi8(_mm_set1_epi8(len as i8), indices);
+        State::from(std::intrinsics::simd::simd_masked_load(core::simd::i8x16::from(mask), data as *const i8, core::simd::i8x16::from(_mm_set1_epi8(len as i8))))
+    }
+
+    #[inline(always)]
+    pub unsafe fn portable_simd(data: *const State, len: usize) -> State {
+        let slice = std::slice::from_raw_parts(data as *const i8, len);
+        let data: Simd<i8, 16> = Simd::<i8, 16>::load_or_default(&slice);
+        transmute(data)
+    }
+}
+
+fn benchmark(c: &mut Criterion) {
+    let mut group = c.benchmark_group("get_partial_safe");
+
+    // Prepare test data
+    let test_data: arch::State = unsafe { std::mem::zeroed() };
+
+    // Benchmark with different lengths
+    for &len in &[4, 8, 12, 16] {
+        group.bench_function(format!("copy ({})", len), |b| {
+            b.iter(|| unsafe {
+                black_box(arch::copy(
+                    black_box(&test_data as *const arch::State),
+                    black_box(len),
+                ))
+            })
+        });
+
+        group.bench_function(format!("urbd ({})", len), |b| {
+            b.iter(|| unsafe {
+                black_box(arch::urbd(
+                    black_box(&test_data as *const arch::State),
+                    black_box(len),
+                ))
+            })
+        });
+
+        group.bench_function(format!("urbd_asm ({})", len), |b| {
+            b.iter(|| unsafe {
+                black_box(arch::urbd(
+                    black_box(&test_data as *const arch::State),
+                    black_box(len),
+                ))
+            })
+        });
+
+        group.bench_function(format!("simd_masked_load ({})", len), |b| {
+            b.iter(|| unsafe {
+                black_box(arch::simd_masked_load(
+                    black_box(&test_data as *const arch::State),
+                    black_box(len),
+                ))
+            })
+        });
+
+        group.bench_function(format!("portable_simd ({})", len), |b| {
+            b.iter(|| unsafe {
+                black_box(arch::portable_simd(
+                    black_box(&test_data as *const arch::State),
+                    black_box(len),
+                ))
+            })
+        });
+    }
+
+    group.finish();
+}
+criterion_group!(benches, benchmark);
+criterion_main!(benches);