diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml
index 2aa4d49..d7039fe 100644
--- a/.github/workflows/bench.yml
+++ b/.github/workflows/bench.yml
@@ -1,6 +1,8 @@
 name: Benchmark
 
 on:
+  pull_request:
+    branches: [ "main" ]
   workflow_dispatch:
 
 env:
@@ -17,6 +19,9 @@ jobs:
     - name: Update rust
       run: rustup update
 
+    - name: Switch to nightly rust
+      run: rustup default nightly
+
     - name: Benchmark
       run: cargo bench --bench throughput --features bench-plot
 
@@ -32,6 +37,9 @@ jobs:
     steps:
     - uses: actions/checkout@v4
 
+    - name: Update rust
+      run: rustup update
+
     - name: Switch to nightly rust
       run: rustup default nightly
 
@@ -53,6 +61,9 @@ jobs:
     - name: Update rust
       run: rustup update
 
+    - name: Switch to nightly rust
+      run: rustup default nightly
+
     - name: Benchmark
       run: cargo bench --bench throughput --features bench-plot
 
diff --git a/.github/workflows/build_test.yml b/.github/workflows/build_test.yml
index da4a907..768d37c 100644
--- a/.github/workflows/build_test.yml
+++ b/.github/workflows/build_test.yml
@@ -17,6 +17,9 @@ jobs:
     steps:
     - uses: actions/checkout@v3
 
+    - name: Switch to nightly rust
+      run: rustup default nightly
+
     - name: Rust version
       run: cargo rustc -- --version
 
@@ -52,6 +55,9 @@ jobs:
     steps:
     - uses: actions/checkout@v3
 
+    - name: Switch to nightly rust
+      run: rustup default nightly
+
     - name: Rust version
       run: cargo rustc -- --version
 
diff --git a/Cargo.toml b/Cargo.toml
index e410fa6..d503dc4 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -31,7 +31,7 @@ itertools = "0.12.0"
 # Benchmarks
 criterion = { version = "0.5.1" }
 # Other hash algorithms, for comparison.
-ahash = "0.8.6"
+ahash = "0.8.11"
 t1ha = "0.1.0"
 twox-hash = "1.6.3"
 highway = "1.1.0"
@@ -62,4 +62,8 @@ harness = false
 
 [[bench]]
 name = "quality"
+harness = false
+
+[[bench]]
+name = "read_beyond"
 harness = false
\ No newline at end of file
diff --git a/README.md b/README.md
index 12379a4..4645698 100644
--- a/README.md
+++ b/README.md
@@ -109,11 +109,15 @@ cargo bench --bench throughput
 cargo bench --bench hashset
 ```
 
+Note: The `throughput` benchmark does not relies of criterion of timings measurements. In an attempt of reducing biais in this microbenchmark as much as possible, it shuffles seeds, input data, and alignment. It also has the benefit of being less of a "black box" compared to criterion. There is however a criterion-based throughput benchmark named `throughput_criterion` if you prefer. Results vary slightly between the two benchmarks, don't hesitate to submit an issue if you suspect biais and want to suggest improvements.
+
+Most importantly: if performance if a critical feature for your application, don't forget to benchmark the cost of hashing in your own context. Numbers shared here may be radically different in your environment and with your hardware.
+
 ### Throughput
 
 Throughput is measured as the number of bytes hashed per second.
 
-*Some prefer talking **latency** (time for generating a hash) or **hashrate** (the number of hashes generated per second) for measuring hash function performance, but those are all equivalent in the end as they all boil down to measuring the time it takes to hash some input and then apply different scalar transformation. For instance, if latency for a `4 bytes` hash is `1 ms`, then the throughput is `1 / 0.001 * 4 = 4000 bytes per second`. Throughput allows us to conveniently compare the performance of a hash function for any input size on a single graph.*
+*Some prefer talking of **latency** (time for generating a hash) or **hashrate** (the number of hashes generated per second) for measuring hash function performance, but those are all equivalent in the end as they all boil down to measuring the time it takes to hash some input and then apply different scalar transformation. For instance, if latency for a `4 bytes` hash is `1 ms`, then the throughput is `1 / 0.001 * 4 = 4000 bytes per second`. Throughput allows us to conveniently compare the performance of a hash function for any input size on a single graph.*
 
 **Latest Benchmark Results:**    
 ![aarch64](./benches/throughput/aarch64.svg)
diff --git a/benches/read_beyond.rs b/benches/read_beyond.rs
new file mode 100644
index 0000000..b8a2a58
--- /dev/null
+++ b/benches/read_beyond.rs
@@ -0,0 +1,188 @@
+#![feature(portable_simd)]
+#![feature(core_intrinsics)]
+
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use std::simd::*;
+use std::mem::transmute;
+
+#[cfg(target_arch = "aarch64")]
+mod arch {
+
+    // Macbook pro M1
+    // get_partial_safe/copy (4)
+    //                         time:   [7.5658 ns 7.6379 ns 7.7465 ns]
+    // get_partial_safe/urbd (4)
+    //                         time:   [1.2707 ns 1.2803 ns 1.2944 ns]
+    // get_partial_safe/simd_masked_load (4)
+    //                         time:   [2.9972 ns 3.0029 ns 3.0107 ns]
+    // get_partial_safe/portable_simd (4)
+    //                         time:   [3.8087 ns 3.8305 ns 3.8581 ns]
+
+    // AMD Ryzen 5 5625U
+    // get_partial_safe/copy (4)
+    //                         time:   [9.0579 ns 9.0854 ns 9.1167 ns]
+    // get_partial_safe/urbd (4)
+    //                         time:   [4.6165 ns 4.6203 ns 4.6244 ns]
+    // get_partial_safe/simd_masked_load (4)
+    //                         time:   [3.2439 ns 3.2556 ns 3.2746 ns]
+    // get_partial_safe/portable_simd (4)
+    //                         time:   [3.3122 ns 3.3192 ns 3.3280 ns]
+
+    use super::*;
+    use core::arch::aarch64::*;
+
+    pub type State = int8x16_t;
+
+    #[inline(always)]
+    pub unsafe fn copy(data: *const State, len: usize) -> State {
+        // Temporary buffer filled with zeros
+        let mut buffer = [0i8; 16];
+        // Copy data into the buffer
+        core::ptr::copy(data as *const i8, buffer.as_mut_ptr(), len);
+        // Load the buffer into a __m256i vector
+        let partial_vector = vld1q_s8(buffer.as_ptr());
+        vaddq_s8(partial_vector, vdupq_n_s8(len as i8))
+    }
+
+    #[inline(always)]
+    pub unsafe fn urbd(data: *const State, len: usize) -> State {
+        // Stripped of page check for simplicity, might crash program
+        let indices = vld1q_s8([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15].as_ptr());
+        let mask = vcgtq_s8(vdupq_n_s8(len as i8), indices);
+        vandq_s8(vld1q_s8(data as *const i8), vreinterpretq_s8_u8(mask))
+    }
+
+    #[inline(always)]
+    pub unsafe fn urbd_asm(data: *const State, len: usize) -> State {
+        // Stripped of page check for simplicity, might crash program
+        let indices = vld1q_s8([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15].as_ptr());
+        let mask = vcgtq_s8(vdupq_n_s8(len as i8), indices);
+        let oob_vector = vld1q_s8(data as *const i8); // asm to do
+        vandq_s8(oob_vector, vreinterpretq_s8_u8(mask))
+    }
+
+    #[inline(always)]
+    pub unsafe fn simd_masked_load(data: *const State, len: usize) -> State {
+        let indices = vld1q_s8([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15].as_ptr());
+        let mask = vreinterpretq_s8_u8(vcgtq_s8(vdupq_n_s8(len as i8), indices));
+        std::intrinsics::simd::simd_masked_load(mask, data as *const i8, vdupq_n_s8(len as i8))
+    }
+
+    #[inline(always)]
+    pub unsafe fn portable_simd(data: *const State, len: usize) -> State {
+        let slice = std::slice::from_raw_parts(data as *const i8, len);
+        let data: Simd<i8, 16> = Simd::<i8, 16>::load_or_default(&slice);
+        transmute(data)
+    }
+}
+
+#[cfg(target_arch = "x86_64")]
+mod arch {
+    use super::*;
+    use core::arch::x86_64::*;
+
+    pub type State = __m128i;
+
+    #[inline(always)]
+    pub unsafe fn copy(data: *const State, len: usize) -> State {
+        // Temporary buffer filled with zeros
+        let mut buffer = [0i8; 16];
+        // Copy data into the buffer
+        core::ptr::copy(data as *const i8, buffer.as_mut_ptr(), len);
+        // // Load the buffer into a __m256i vector
+        let partial_vector = _mm_loadu_si128(buffer.as_ptr() as *const State);
+        _mm_add_epi8(partial_vector, _mm_set1_epi8(len as i8))
+    }
+
+    #[inline(always)]
+    pub unsafe fn urbd(data: *const State, len: usize) -> State {
+        // Stripped of page check for simplicity, might crash program
+        let indices = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+        let mask = _mm_cmpgt_epi8(_mm_set1_epi8(len as i8), indices);
+        _mm_and_si128(_mm_loadu_si128(data), mask)
+    }
+
+    #[inline(always)]
+    pub unsafe fn urbd_asm(data: *const State, len: usize) -> State {
+        use std::arch::asm;
+        // Stripped of page check for simplicity, might crash program
+        let indices = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+        let mask = _mm_cmpgt_epi8(_mm_set1_epi8(len as i8), indices);
+        let mut oob_vector: State;
+        asm!("movdqu [{}], {}", in(reg) data, out(xmm_reg) oob_vector, options(pure, nomem, nostack));
+        _mm_and_si128(oob_vector, mask)
+    }
+
+    #[inline(always)]
+    pub unsafe fn simd_masked_load(data: *const State, len: usize) -> State {
+        let indices = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+        let mask = _mm_cmpgt_epi8(_mm_set1_epi8(len as i8), indices);
+        State::from(std::intrinsics::simd::simd_masked_load(core::simd::i8x16::from(mask), data as *const i8, core::simd::i8x16::from(_mm_set1_epi8(len as i8))))
+    }
+
+    #[inline(always)]
+    pub unsafe fn portable_simd(data: *const State, len: usize) -> State {
+        let slice = std::slice::from_raw_parts(data as *const i8, len);
+        let data: Simd<i8, 16> = Simd::<i8, 16>::load_or_default(&slice);
+        transmute(data)
+    }
+}
+
+fn benchmark(c: &mut Criterion) {
+    let mut group = c.benchmark_group("get_partial_safe");
+
+    // Prepare test data
+    let test_data: arch::State = unsafe { std::mem::zeroed() };
+
+    // Benchmark with different lengths
+    for &len in &[4, 8, 12, 16] {
+        group.bench_function(format!("copy ({})", len), |b| {
+            b.iter(|| unsafe {
+                black_box(arch::copy(
+                    black_box(&test_data as *const arch::State),
+                    black_box(len),
+                ))
+            })
+        });
+
+        group.bench_function(format!("urbd ({})", len), |b| {
+            b.iter(|| unsafe {
+                black_box(arch::urbd(
+                    black_box(&test_data as *const arch::State),
+                    black_box(len),
+                ))
+            })
+        });
+
+        group.bench_function(format!("urbd_asm ({})", len), |b| {
+            b.iter(|| unsafe {
+                black_box(arch::urbd(
+                    black_box(&test_data as *const arch::State),
+                    black_box(len),
+                ))
+            })
+        });
+
+        group.bench_function(format!("simd_masked_load ({})", len), |b| {
+            b.iter(|| unsafe {
+                black_box(arch::simd_masked_load(
+                    black_box(&test_data as *const arch::State),
+                    black_box(len),
+                ))
+            })
+        });
+
+        group.bench_function(format!("portable_simd ({})", len), |b| {
+            b.iter(|| unsafe {
+                black_box(arch::portable_simd(
+                    black_box(&test_data as *const arch::State),
+                    black_box(len),
+                ))
+            })
+        });
+    }
+
+    group.finish();
+}
+criterion_group!(benches, benchmark);
+criterion_main!(benches);
\ No newline at end of file
diff --git a/benches/throughput/aarch64.svg b/benches/throughput/aarch64.svg
index bebea7a..3bcf061 100644
--- a/benches/throughput/aarch64.svg
+++ b/benches/throughput/aarch64.svg
@@ -3,14 +3,9 @@
 <text x="290" y="5" dy="0.76em" text-anchor="middle" font-family="sans-serif" font-size="16.129032258064516" opacity="1" fill="#000000">
 Throughput (aarch64)
 </text>
-<line opacity="0.1" stroke="#000000" stroke-width="1" x1="56" y1="359" x2="579" y2="359"/>
-<line opacity="0.1" stroke="#000000" stroke-width="1" x1="56" y1="315" x2="579" y2="315"/>
-<line opacity="0.1" stroke="#000000" stroke-width="1" x1="56" y1="270" x2="579" y2="270"/>
-<line opacity="0.1" stroke="#000000" stroke-width="1" x1="56" y1="226" x2="579" y2="226"/>
-<line opacity="0.1" stroke="#000000" stroke-width="1" x1="56" y1="181" x2="579" y2="181"/>
-<line opacity="0.1" stroke="#000000" stroke-width="1" x1="56" y1="136" x2="579" y2="136"/>
-<line opacity="0.1" stroke="#000000" stroke-width="1" x1="56" y1="92" x2="579" y2="92"/>
-<line opacity="0.1" stroke="#000000" stroke-width="1" x1="56" y1="47" x2="579" y2="47"/>
+<line opacity="0.1" stroke="#000000" stroke-width="1" x1="56" y1="339" x2="579" y2="339"/>
+<line opacity="0.1" stroke="#000000" stroke-width="1" x1="56" y1="217" x2="579" y2="217"/>
+<line opacity="0.1" stroke="#000000" stroke-width="1" x1="56" y1="96" x2="579" y2="96"/>
 <text x="0" y="193" dy="0.76em" text-anchor="middle" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000" transform="rotate(270, 0, 193)">
 Throughput (MiB/s)
 </text>
@@ -31,47 +26,22 @@ Input Size (bytes)
 <line opacity="0.2" stroke="#000000" stroke-width="1" x1="498" y1="359" x2="498" y2="26"/>
 <line opacity="0.2" stroke="#000000" stroke-width="1" x1="538" y1="359" x2="538" y2="26"/>
 <line opacity="0.2" stroke="#000000" stroke-width="1" x1="579" y1="359" x2="579" y2="26"/>
-<line opacity="0.2" stroke="#000000" stroke-width="1" x1="56" y1="359" x2="579" y2="359"/>
-<line opacity="0.2" stroke="#000000" stroke-width="1" x1="56" y1="315" x2="579" y2="315"/>
-<line opacity="0.2" stroke="#000000" stroke-width="1" x1="56" y1="270" x2="579" y2="270"/>
-<line opacity="0.2" stroke="#000000" stroke-width="1" x1="56" y1="226" x2="579" y2="226"/>
-<line opacity="0.2" stroke="#000000" stroke-width="1" x1="56" y1="181" x2="579" y2="181"/>
-<line opacity="0.2" stroke="#000000" stroke-width="1" x1="56" y1="136" x2="579" y2="136"/>
-<line opacity="0.2" stroke="#000000" stroke-width="1" x1="56" y1="92" x2="579" y2="92"/>
-<line opacity="0.2" stroke="#000000" stroke-width="1" x1="56" y1="47" x2="579" y2="47"/>
+<line opacity="0.2" stroke="#000000" stroke-width="1" x1="56" y1="339" x2="579" y2="339"/>
+<line opacity="0.2" stroke="#000000" stroke-width="1" x1="56" y1="217" x2="579" y2="217"/>
+<line opacity="0.2" stroke="#000000" stroke-width="1" x1="56" y1="96" x2="579" y2="96"/>
 <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="55,26 55,359 "/>
-<text x="46" y="359" dy="0.5ex" text-anchor="end" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
-0
+<text x="46" y="339" dy="0.5ex" text-anchor="end" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
+100
 </text>
-<polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="50,359 55,359 "/>
-<text x="46" y="315" dy="0.5ex" text-anchor="end" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
-5000
+<polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="50,339 55,339 "/>
+<text x="46" y="217" dy="0.5ex" text-anchor="end" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
+1000
 </text>
-<polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="50,315 55,315 "/>
-<text x="46" y="270" dy="0.5ex" text-anchor="end" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
+<polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="50,217 55,217 "/>
+<text x="46" y="96" dy="0.5ex" text-anchor="end" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
 10000
 </text>
-<polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="50,270 55,270 "/>
-<text x="46" y="226" dy="0.5ex" text-anchor="end" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
-15000
-</text>
-<polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="50,226 55,226 "/>
-<text x="46" y="181" dy="0.5ex" text-anchor="end" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
-20000
-</text>
-<polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="50,181 55,181 "/>
-<text x="46" y="136" dy="0.5ex" text-anchor="end" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
-25000
-</text>
-<polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="50,136 55,136 "/>
-<text x="46" y="92" dy="0.5ex" text-anchor="end" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
-30000
-</text>
-<polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="50,92 55,92 "/>
-<text x="46" y="47" dy="0.5ex" text-anchor="end" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
-35000
-</text>
-<polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="50,47 55,47 "/>
+<polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="50,96 55,96 "/>
 <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="56,360 579,360 "/>
 <text x="56" y="370" dy="0.76em" text-anchor="middle" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
 4
@@ -129,14 +99,14 @@ Input Size (bytes)
 32768
 </text>
 <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="579,360 579,365 "/>
-<polyline fill="none" opacity="1" stroke="#E6194B" stroke-width="2" points="56,344 96,324 136,287 176,293 216,246 257,250 297,189 337,135 377,101 418,73 458,58 498,50 538,45 579,42 "/>
-<polyline fill="none" opacity="1" stroke="#3CB44B" stroke-width="2" points="56,353 96,346 136,333 176,324 216,316 257,308 297,317 337,306 377,294 418,290 458,284 498,284 538,284 579,284 "/>
-<polyline fill="none" opacity="1" stroke="#FFE119" stroke-width="2" points="56,355 96,351 136,343 176,333 216,323 257,315 297,309 337,305 377,303 418,302 458,302 498,302 538,301 579,301 "/>
-<polyline fill="none" opacity="1" stroke="#0082C8" stroke-width="2" points="56,354 96,349 136,343 176,338 216,314 257,295 297,279 337,265 377,256 418,251 458,249 498,247 538,246 579,243 "/>
-<polyline fill="none" opacity="1" stroke="#F58230" stroke-width="2" points="56,351 96,351 136,351 176,352 216,354 257,354 297,354 337,354 377,354 418,354 458,354 498,355 538,355 579,355 "/>
-<polyline fill="none" opacity="1" stroke="#911EB4" stroke-width="2" points="56,359 96,358 136,357 176,354 216,349 257,343 297,335 337,327 377,321 418,318 458,315 498,314 538,313 579,312 "/>
-<polyline fill="none" opacity="1" stroke="#46F0F0" stroke-width="2" points="56,356 96,351 136,345 176,338 216,333 257,328 297,327 337,326 377,326 418,326 458,326 498,326 538,326 579,326 "/>
-<polyline fill="none" opacity="1" stroke="#F032E6" stroke-width="2" points="56,355 96,350 136,346 176,343 216,333 257,321 297,311 337,303 377,298 418,295 458,293 498,292 538,292 579,292 "/>
+<polyline fill="none" opacity="1" stroke="#E6194B" stroke-width="2" points="56,204 96,168 136,131 176,111 216,91 257,94 297,67 337,51 377,41 418,35 458,32 498,30 538,29 579,29 "/>
+<polyline fill="none" opacity="1" stroke="#3CB44B" stroke-width="2" points="56,208 96,172 136,138 176,137 216,129 257,131 297,134 337,123 377,111 418,107 458,106 498,104 538,103 579,104 "/>
+<polyline fill="none" opacity="1" stroke="#FFE119" stroke-width="2" points="56,257 96,220 136,184 176,161 216,144 257,136 297,128 337,123 377,121 418,119 458,119 498,119 538,118 579,118 "/>
+<polyline fill="none" opacity="1" stroke="#0082C8" stroke-width="2" points="56,246 96,210 136,185 176,171 216,134 257,115 297,101 337,93 377,88 418,86 458,84 498,84 538,82 579,82 "/>
+<polyline fill="none" opacity="1" stroke="#F58230" stroke-width="2" points="56,220 96,220 136,220 176,229 216,239 257,244 297,246 337,247 377,247 418,247 458,248 498,248 538,248 579,248 "/>
+<polyline fill="none" opacity="1" stroke="#911EB4" stroke-width="2" points="56,357 96,318 136,281 176,240 216,210 257,184 297,164 337,149 377,141 418,136 458,133 498,132 538,132 579,132 "/>
+<polyline fill="none" opacity="1" stroke="#46F0F0" stroke-width="2" points="56,262 96,218 136,192 176,172 216,159 257,150 297,148 337,148 377,147 418,147 458,147 498,147 538,147 579,147 "/>
+<polyline fill="none" opacity="1" stroke="#F032E6" stroke-width="2" points="56,234 96,198 136,182 176,183 216,158 257,140 297,127 337,119 377,115 418,112 458,111 498,111 538,110 579,110 "/>
 <rect x="454" y="125" width="121" height="135" opacity="0.7" fill="#FFFFFF" stroke="none"/>
 <rect x="454" y="125" width="121" height="135" opacity="1" fill="none" stroke="#000000"/>
 <text x="494" y="135" dy="0.76em" text-anchor="start" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
diff --git a/benches/throughput/main.rs b/benches/throughput/main.rs
index c84a9cc..cc4566d 100644
--- a/benches/throughput/main.rs
+++ b/benches/throughput/main.rs
@@ -2,8 +2,8 @@ mod result_processor;
 
 use result_processor::*;
 
-use std::hash::Hasher;
 use std::hint::black_box;
+use std::hash::Hasher;
 use std::time::{Instant, Duration};
 use std::alloc::{alloc, dealloc, Layout};
 use std::slice;
@@ -14,7 +14,6 @@ use gxhash::*;
 
 const ITERATIONS: u32 = 1000;
 const MAX_RUN_DURATION: Duration = Duration::from_millis(1000);
-const FORCE_NO_INLINING: bool = false;
 
 fn main() {
     let mut rng = rand::thread_rng();
@@ -49,7 +48,7 @@ fn main() {
     });
     
     // AHash
-    let ahash_hasher = ahash::RandomState::with_seeds(0, 0, 0, 0);
+    let ahash_hasher = ahash::RandomState::with_seed(42);
     benchmark(processor.as_mut(), slice, "AHash", |data: &[u8], _: i32| -> u64 {
         ahash_hasher.hash_one(data)
     });
@@ -91,7 +90,7 @@ fn main() {
 }
 
 fn benchmark<F, S>(processor: &mut dyn ResultProcessor, data: &[u8], name: &str, delegate: F)
-    where F: Fn(&[u8], S) -> u64, S: Default + TryFrom<u128> + TryInto<usize>
+    where F: Fn(&[u8], S) -> u64, S: Default + TryFrom<u128> + TryInto<usize> + Clone + Copy
 {
     processor.on_start(name);
     for i in 2.. {
@@ -101,22 +100,20 @@ fn benchmark<F, S>(processor: &mut dyn ResultProcessor, data: &[u8], name: &str,
         }
 
         // Warmup
-        black_box(time(ITERATIONS, &|| delegate(&data[..len], S::default()))); 
+        time(ITERATIONS, &delegate, &data[..len], S::default()); 
 
         let mut durations_s = vec![];
         let now = Instant::now();
         while now.elapsed() < MAX_RUN_DURATION {
             // Make seed unpredictable to prevent optimizations
-            let seed = S::try_from(now.elapsed().as_nanos())
-                .unwrap_or_else(|_| panic!("Something went horribly wrong!"));
+            let seed = S::try_from(now.elapsed().as_nanos()).unwrap_or_else(|_| panic!());
             // Offset slice by an unpredictable amount to prevent optimization (pre caching)
             // and make the benchmark use both aligned and unaligned data
-            let start = S::try_into(seed)
-                .unwrap_or_else(|_| panic!("Something went horribly wrong!")) & 0xFF;
+            let start = S::try_into(seed).unwrap_or_else(|_| panic!()) & 0xFF;
             let end = start + len;
             let slice = &data[start..end];
             // Execute method for a new iterations
-            let duration = time(ITERATIONS, &|| delegate(slice, S::default()));
+            let duration = time(ITERATIONS, &delegate, slice, seed);
             durations_s.push(duration.as_secs_f64());
         }
         let average_duration_s = calculate_average_without_outliers(&mut durations_s);
@@ -128,31 +125,21 @@ fn benchmark<F, S>(processor: &mut dyn ResultProcessor, data: &[u8], name: &str,
 }
 
 #[inline(never)]
-fn time<F>(iterations: u32, delegate: &F) -> Duration
-    where F: Fn() -> u64
+fn time<F, S>(iterations: u32, delegate: F, slice: &[u8], seed: S) -> Duration
+    where F: Fn(&[u8], S) -> u64, S: Default + TryFrom<u128> + TryInto<usize> + Clone + Copy
 {
     let now = Instant::now();
-    // Bench the same way to what is done in criterion.rs
+    // Bench a similar way to what is done in criterion.rs
     // https://github.com/bheisler/criterion.rs/blob/e1a8c9ab2104fbf2d15f700d0038b2675054a2c8/src/bencher.rs#L87
-    for _ in 0..iterations {  
-        if FORCE_NO_INLINING {
-            black_box(execute_noinlining(delegate));
-        } else {
-            black_box(delegate());
-        }
+    for _ in 0..iterations {
+        // Black box the result to prevent the compiler from optimizing the operation away
+        // Black box the slice to prevent the compiler to assume the slice is constant
+        // We don't black box the seed because it's likely to be constant in most real-world usage scenarios
+        black_box(delegate(black_box(slice), seed));
     }
     now.elapsed()
 }
 
-// Some algorithm are more likely to be inlined than others.
-// This puts then all at the same level. But is it fair?
-#[inline(never)]
-fn execute_noinlining<F>(delegate: &F) -> u64
-    where F: Fn() -> u64
-{
-    delegate()
-}
-
 // Outliers are inevitable, especially on a low number of iterations
 // To avoid computing a huge number of iterations we can use the interquartile range
 fn calculate_average_without_outliers(timings: &mut Vec<f64>) -> f64 {
diff --git a/benches/throughput/result_processor.rs b/benches/throughput/result_processor.rs
index c7dbbeb..8963cbe 100644
--- a/benches/throughput/result_processor.rs
+++ b/benches/throughput/result_processor.rs
@@ -132,7 +132,7 @@ impl ResultProcessor for OutputPlot {
         let x_min = self.series.iter().next().unwrap().1.iter().map(|(x, _)| *x as u32).min().unwrap();
         let x_max = self.series.iter().next().unwrap().1.iter().map(|(x, _)| *x as u32).max().unwrap();
 
-        let y_min = 0u32;
+        let y_min = self.series.iter().flat_map(|inner_map| inner_map.1.iter()).map(|(_, y)| (0.95 * *y) as u32).min().unwrap();
         let y_max = self.series.iter().flat_map(|inner_map| inner_map.1.iter()).map(|(_, y)| (1.05 * *y) as u32).max().unwrap();
 
         let mut chart = ChartBuilder::on(&canvas)
@@ -144,8 +144,8 @@ impl ResultProcessor for OutputPlot {
                 (x_min..x_max)
                     .log_scale()
                     .with_key_points(self.series.iter().next().unwrap().1.iter().map(|(x, _)| *x as u32).collect::<Vec<u32>>()),
-                    y_min..y_max
-                    //.log_scale(),
+                    (y_min..y_max)
+                    .log_scale(),
             ).unwrap();
 
         chart
diff --git a/benches/throughput/x86_64-hybrid.svg b/benches/throughput/x86_64-hybrid.svg
index ea05a09..8d20c49 100644
--- a/benches/throughput/x86_64-hybrid.svg
+++ b/benches/throughput/x86_64-hybrid.svg
@@ -3,15 +3,10 @@
 <text x="290" y="5" dy="0.76em" text-anchor="middle" font-family="sans-serif" font-size="16.129032258064516" opacity="1" fill="#000000">
 Throughput (x86_64-hybrid)
 </text>
-<line opacity="0.1" stroke="#000000" stroke-width="1" x1="56" y1="359" x2="579" y2="359"/>
-<line opacity="0.1" stroke="#000000" stroke-width="1" x1="56" y1="319" x2="579" y2="319"/>
-<line opacity="0.1" stroke="#000000" stroke-width="1" x1="56" y1="279" x2="579" y2="279"/>
-<line opacity="0.1" stroke="#000000" stroke-width="1" x1="56" y1="239" x2="579" y2="239"/>
-<line opacity="0.1" stroke="#000000" stroke-width="1" x1="56" y1="199" x2="579" y2="199"/>
-<line opacity="0.1" stroke="#000000" stroke-width="1" x1="56" y1="159" x2="579" y2="159"/>
-<line opacity="0.1" stroke="#000000" stroke-width="1" x1="56" y1="119" x2="579" y2="119"/>
-<line opacity="0.1" stroke="#000000" stroke-width="1" x1="56" y1="79" x2="579" y2="79"/>
-<line opacity="0.1" stroke="#000000" stroke-width="1" x1="56" y1="39" x2="579" y2="39"/>
+<line opacity="0.1" stroke="#000000" stroke-width="1" x1="56" y1="338" x2="579" y2="338"/>
+<line opacity="0.1" stroke="#000000" stroke-width="1" x1="56" y1="240" x2="579" y2="240"/>
+<line opacity="0.1" stroke="#000000" stroke-width="1" x1="56" y1="142" x2="579" y2="142"/>
+<line opacity="0.1" stroke="#000000" stroke-width="1" x1="56" y1="43" x2="579" y2="43"/>
 <text x="0" y="193" dy="0.76em" text-anchor="middle" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000" transform="rotate(270, 0, 193)">
 Throughput (MiB/s)
 </text>
@@ -32,52 +27,27 @@ Input Size (bytes)
 <line opacity="0.2" stroke="#000000" stroke-width="1" x1="498" y1="359" x2="498" y2="26"/>
 <line opacity="0.2" stroke="#000000" stroke-width="1" x1="538" y1="359" x2="538" y2="26"/>
 <line opacity="0.2" stroke="#000000" stroke-width="1" x1="579" y1="359" x2="579" y2="26"/>
-<line opacity="0.2" stroke="#000000" stroke-width="1" x1="56" y1="359" x2="579" y2="359"/>
-<line opacity="0.2" stroke="#000000" stroke-width="1" x1="56" y1="319" x2="579" y2="319"/>
-<line opacity="0.2" stroke="#000000" stroke-width="1" x1="56" y1="279" x2="579" y2="279"/>
-<line opacity="0.2" stroke="#000000" stroke-width="1" x1="56" y1="239" x2="579" y2="239"/>
-<line opacity="0.2" stroke="#000000" stroke-width="1" x1="56" y1="199" x2="579" y2="199"/>
-<line opacity="0.2" stroke="#000000" stroke-width="1" x1="56" y1="159" x2="579" y2="159"/>
-<line opacity="0.2" stroke="#000000" stroke-width="1" x1="56" y1="119" x2="579" y2="119"/>
-<line opacity="0.2" stroke="#000000" stroke-width="1" x1="56" y1="79" x2="579" y2="79"/>
-<line opacity="0.2" stroke="#000000" stroke-width="1" x1="56" y1="39" x2="579" y2="39"/>
+<line opacity="0.2" stroke="#000000" stroke-width="1" x1="56" y1="338" x2="579" y2="338"/>
+<line opacity="0.2" stroke="#000000" stroke-width="1" x1="56" y1="240" x2="579" y2="240"/>
+<line opacity="0.2" stroke="#000000" stroke-width="1" x1="56" y1="142" x2="579" y2="142"/>
+<line opacity="0.2" stroke="#000000" stroke-width="1" x1="56" y1="43" x2="579" y2="43"/>
 <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="55,26 55,359 "/>
-<text x="46" y="359" dy="0.5ex" text-anchor="end" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
-0
+<text x="46" y="338" dy="0.5ex" text-anchor="end" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
+100
 </text>
-<polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="50,359 55,359 "/>
-<text x="46" y="319" dy="0.5ex" text-anchor="end" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
-20000
+<polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="50,338 55,338 "/>
+<text x="46" y="240" dy="0.5ex" text-anchor="end" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
+1000
 </text>
-<polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="50,319 55,319 "/>
-<text x="46" y="279" dy="0.5ex" text-anchor="end" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
-40000
+<polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="50,240 55,240 "/>
+<text x="46" y="142" dy="0.5ex" text-anchor="end" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
+10000
 </text>
-<polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="50,279 55,279 "/>
-<text x="46" y="239" dy="0.5ex" text-anchor="end" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
-60000
-</text>
-<polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="50,239 55,239 "/>
-<text x="46" y="199" dy="0.5ex" text-anchor="end" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
-80000
-</text>
-<polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="50,199 55,199 "/>
-<text x="46" y="159" dy="0.5ex" text-anchor="end" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
+<polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="50,142 55,142 "/>
+<text x="46" y="43" dy="0.5ex" text-anchor="end" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
 100000
 </text>
-<polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="50,159 55,159 "/>
-<text x="46" y="119" dy="0.5ex" text-anchor="end" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
-120000
-</text>
-<polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="50,119 55,119 "/>
-<text x="46" y="79" dy="0.5ex" text-anchor="end" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
-140000
-</text>
-<polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="50,79 55,79 "/>
-<text x="46" y="39" dy="0.5ex" text-anchor="end" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
-160000
-</text>
-<polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="50,39 55,39 "/>
+<polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="50,43 55,43 "/>
 <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="56,360 579,360 "/>
 <text x="56" y="370" dy="0.76em" text-anchor="middle" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
 4
@@ -135,14 +105,14 @@ Input Size (bytes)
 32768
 </text>
 <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="579,360 579,365 "/>
-<polyline fill="none" opacity="1" stroke="#E6194B" stroke-width="2" points="56,347 96,335 136,311 176,298 216,259 257,248 297,186 337,123 377,75 418,53 458,42 498,43 538,51 579,60 "/>
-<polyline fill="none" opacity="1" stroke="#3CB44B" stroke-width="2" points="56,356 96,353 136,346 176,337 216,325 257,316 297,318 337,311 377,301 418,306 458,312 498,313 538,313 579,315 "/>
-<polyline fill="none" opacity="1" stroke="#FFE119" stroke-width="2" points="56,354 96,349 136,336 176,319 216,302 257,305 297,287 337,275 377,269 418,270 458,273 498,275 538,275 579,276 "/>
-<polyline fill="none" opacity="1" stroke="#0082C8" stroke-width="2" points="56,357 96,355 136,351 176,345 216,330 257,303 297,263 337,217 377,184 418,172 458,156 498,144 538,139 579,141 "/>
-<polyline fill="none" opacity="1" stroke="#F58230" stroke-width="2" points="56,354 96,352 136,353 176,355 216,356 257,357 297,357 337,357 377,357 418,357 458,357 498,357 538,357 579,357 "/>
-<polyline fill="none" opacity="1" stroke="#911EB4" stroke-width="2" points="56,359 96,359 136,358 176,356 216,353 257,347 297,342 337,336 377,331 418,329 458,327 498,326 538,326 579,326 "/>
-<polyline fill="none" opacity="1" stroke="#46F0F0" stroke-width="2" points="56,359 96,359 136,358 176,355 216,352 257,348 297,343 337,339 377,335 418,333 458,332 498,331 538,331 579,330 "/>
-<polyline fill="none" opacity="1" stroke="#F032E6" stroke-width="2" points="56,357 96,355 136,355 176,348 216,341 257,333 297,326 337,321 377,317 418,316 458,317 498,315 538,315 579,314 "/>
+<polyline fill="none" opacity="1" stroke="#E6194B" stroke-width="2" points="56,357 96,327 136,298 176,106 216,81 257,86 297,67 337,49 377,36 418,32 458,29 498,29 538,29 579,29 "/>
+<polyline fill="none" opacity="1" stroke="#3CB44B" stroke-width="2" points="56,202 96,172 136,129 176,114 216,102 257,97 297,141 337,127 377,121 418,116 458,113 498,112 538,110 579,109 "/>
+<polyline fill="none" opacity="1" stroke="#FFE119" stroke-width="2" points="56,210 96,182 136,151 176,117 216,99 257,100 297,88 337,81 377,73 418,72 458,72 498,73 538,73 579,73 "/>
+<polyline fill="none" opacity="1" stroke="#0082C8" stroke-width="2" points="56,246 96,220 136,191 176,167 216,128 257,105 297,84 337,66 377,55 418,49 458,45 498,43 538,42 579,42 "/>
+<polyline fill="none" opacity="1" stroke="#F58230" stroke-width="2" points="56,206 96,185 136,194 176,205 216,219 257,227 297,230 337,232 377,233 418,233 458,233 498,233 538,233 579,233 "/>
+<polyline fill="none" opacity="1" stroke="#911EB4" stroke-width="2" points="56,320 96,290 136,262 176,206 216,180 257,158 297,142 337,130 377,123 418,119 458,118 498,116 538,115 579,115 "/>
+<polyline fill="none" opacity="1" stroke="#46F0F0" stroke-width="2" points="56,301 96,271 136,244 176,214 216,194 257,178 297,168 337,162 377,158 418,156 458,155 498,155 538,155 579,155 "/>
+<polyline fill="none" opacity="1" stroke="#F032E6" stroke-width="2" points="56,226 96,197 136,184 176,165 216,144 257,128 297,120 337,113 377,110 418,109 458,109 498,108 538,108 579,107 "/>
 <rect x="454" y="125" width="121" height="135" opacity="0.7" fill="#FFFFFF" stroke="none"/>
 <rect x="454" y="125" width="121" height="135" opacity="1" fill="none" stroke="#000000"/>
 <text x="494" y="135" dy="0.76em" text-anchor="start" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
diff --git a/benches/throughput/x86_64.svg b/benches/throughput/x86_64.svg
index e306731..5a2fccb 100644
--- a/benches/throughput/x86_64.svg
+++ b/benches/throughput/x86_64.svg
@@ -3,12 +3,9 @@
 <text x="290" y="5" dy="0.76em" text-anchor="middle" font-family="sans-serif" font-size="16.129032258064516" opacity="1" fill="#000000">
 Throughput (x86_64)
 </text>
-<line opacity="0.1" stroke="#000000" stroke-width="1" x1="56" y1="359" x2="579" y2="359"/>
-<line opacity="0.1" stroke="#000000" stroke-width="1" x1="56" y1="304" x2="579" y2="304"/>
-<line opacity="0.1" stroke="#000000" stroke-width="1" x1="56" y1="248" x2="579" y2="248"/>
-<line opacity="0.1" stroke="#000000" stroke-width="1" x1="56" y1="193" x2="579" y2="193"/>
-<line opacity="0.1" stroke="#000000" stroke-width="1" x1="56" y1="137" x2="579" y2="137"/>
-<line opacity="0.1" stroke="#000000" stroke-width="1" x1="56" y1="82" x2="579" y2="82"/>
+<line opacity="0.1" stroke="#000000" stroke-width="1" x1="56" y1="265" x2="579" y2="265"/>
+<line opacity="0.1" stroke="#000000" stroke-width="1" x1="56" y1="149" x2="579" y2="149"/>
+<line opacity="0.1" stroke="#000000" stroke-width="1" x1="56" y1="34" x2="579" y2="34"/>
 <text x="0" y="193" dy="0.76em" text-anchor="middle" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000" transform="rotate(270, 0, 193)">
 Throughput (MiB/s)
 </text>
@@ -29,37 +26,22 @@ Input Size (bytes)
 <line opacity="0.2" stroke="#000000" stroke-width="1" x1="498" y1="359" x2="498" y2="26"/>
 <line opacity="0.2" stroke="#000000" stroke-width="1" x1="538" y1="359" x2="538" y2="26"/>
 <line opacity="0.2" stroke="#000000" stroke-width="1" x1="579" y1="359" x2="579" y2="26"/>
-<line opacity="0.2" stroke="#000000" stroke-width="1" x1="56" y1="359" x2="579" y2="359"/>
-<line opacity="0.2" stroke="#000000" stroke-width="1" x1="56" y1="304" x2="579" y2="304"/>
-<line opacity="0.2" stroke="#000000" stroke-width="1" x1="56" y1="248" x2="579" y2="248"/>
-<line opacity="0.2" stroke="#000000" stroke-width="1" x1="56" y1="193" x2="579" y2="193"/>
-<line opacity="0.2" stroke="#000000" stroke-width="1" x1="56" y1="137" x2="579" y2="137"/>
-<line opacity="0.2" stroke="#000000" stroke-width="1" x1="56" y1="82" x2="579" y2="82"/>
+<line opacity="0.2" stroke="#000000" stroke-width="1" x1="56" y1="265" x2="579" y2="265"/>
+<line opacity="0.2" stroke="#000000" stroke-width="1" x1="56" y1="149" x2="579" y2="149"/>
+<line opacity="0.2" stroke="#000000" stroke-width="1" x1="56" y1="34" x2="579" y2="34"/>
 <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="55,26 55,359 "/>
-<text x="46" y="359" dy="0.5ex" text-anchor="end" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
-0
+<text x="46" y="265" dy="0.5ex" text-anchor="end" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
+1000
 </text>
-<polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="50,359 55,359 "/>
-<text x="46" y="304" dy="0.5ex" text-anchor="end" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
-20000
+<polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="50,265 55,265 "/>
+<text x="46" y="149" dy="0.5ex" text-anchor="end" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
+10000
 </text>
-<polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="50,304 55,304 "/>
-<text x="46" y="248" dy="0.5ex" text-anchor="end" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
-40000
-</text>
-<polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="50,248 55,248 "/>
-<text x="46" y="193" dy="0.5ex" text-anchor="end" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
-60000
-</text>
-<polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="50,193 55,193 "/>
-<text x="46" y="137" dy="0.5ex" text-anchor="end" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
-80000
-</text>
-<polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="50,137 55,137 "/>
-<text x="46" y="82" dy="0.5ex" text-anchor="end" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
+<polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="50,149 55,149 "/>
+<text x="46" y="34" dy="0.5ex" text-anchor="end" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
 100000
 </text>
-<polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="50,82 55,82 "/>
+<polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="50,34 55,34 "/>
 <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="56,360 579,360 "/>
 <text x="56" y="370" dy="0.76em" text-anchor="middle" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
 4
@@ -117,14 +99,14 @@ Input Size (bytes)
 32768
 </text>
 <polyline fill="none" opacity="1" stroke="#000000" stroke-width="1" points="579,360 579,365 "/>
-<polyline fill="none" opacity="1" stroke="#E6194B" stroke-width="2" points="56,342 96,324 136,290 176,273 216,215 257,194 297,132 337,95 377,81 418,58 458,47 498,42 538,43 579,50 "/>
-<polyline fill="none" opacity="1" stroke="#3CB44B" stroke-width="2" points="56,356 96,351 136,340 176,323 216,305 257,291 297,300 337,288 377,276 418,283 458,289 498,292 538,295 579,297 "/>
-<polyline fill="none" opacity="1" stroke="#FFE119" stroke-width="2" points="56,352 96,345 136,327 176,303 216,279 257,284 297,259 337,243 377,234 418,234 458,234 498,236 538,238 579,238 "/>
-<polyline fill="none" opacity="1" stroke="#0082C8" stroke-width="2" points="56,356 96,352 136,348 176,337 216,314 257,272 297,212 337,146 377,119 418,99 458,81 498,69 538,58 579,61 "/>
-<polyline fill="none" opacity="1" stroke="#F58230" stroke-width="2" points="56,352 96,349 136,351 176,353 216,355 257,356 297,356 337,356 377,356 418,356 458,356 498,356 538,356 579,356 "/>
-<polyline fill="none" opacity="1" stroke="#911EB4" stroke-width="2" points="56,359 96,359 136,358 176,355 216,351 257,343 297,335 337,327 377,321 418,317 458,315 498,313 538,312 579,312 "/>
-<polyline fill="none" opacity="1" stroke="#46F0F0" stroke-width="2" points="56,359 96,358 136,356 176,353 216,348 257,341 297,334 337,328 377,324 418,321 458,320 498,320 538,319 579,319 "/>
-<polyline fill="none" opacity="1" stroke="#F032E6" stroke-width="2" points="56,356 96,353 136,353 176,344 216,332 257,322 297,313 337,306 377,301 418,299 458,299 498,298 538,294 579,294 "/>
+<polyline fill="none" opacity="1" stroke="#E6194B" stroke-width="2" points="56,199 96,164 136,129 176,99 216,74 257,71 297,51 337,40 377,35 418,33 458,31 498,29 538,29 579,29 "/>
+<polyline fill="none" opacity="1" stroke="#3CB44B" stroke-width="2" points="56,224 96,189 136,138 176,120 216,100 257,91 297,144 337,128 377,120 418,115 458,111 498,110 538,109 579,109 "/>
+<polyline fill="none" opacity="1" stroke="#FFE119" stroke-width="2" points="56,222 96,188 136,152 176,120 216,98 257,98 297,81 337,75 377,71 418,72 458,74 498,74 538,75 579,76 "/>
+<polyline fill="none" opacity="1" stroke="#0082C8" stroke-width="2" points="56,261 96,226 136,201 176,168 216,130 257,98 297,71 337,51 377,42 418,37 458,33 498,30 538,29 579,29 "/>
+<polyline fill="none" opacity="1" stroke="#F58230" stroke-width="2" points="56,228 96,206 136,209 176,222 216,238 257,247 297,251 337,253 377,254 418,255 458,255 498,255 538,255 579,255 "/>
+<polyline fill="none" opacity="1" stroke="#911EB4" stroke-width="2" points="56,357 96,322 136,288 176,245 216,210 257,177 297,156 337,141 377,131 418,126 458,124 498,123 538,122 579,122 "/>
+<polyline fill="none" opacity="1" stroke="#46F0F0" stroke-width="2" points="56,337 96,302 136,263 176,228 216,199 257,175 297,157 337,144 377,136 418,132 458,130 498,129 538,129 579,128 "/>
+<polyline fill="none" opacity="1" stroke="#F032E6" stroke-width="2" points="56,254 96,224 136,204 176,172 216,150 257,132 297,122 337,114 377,111 418,109 458,109 498,107 538,107 579,107 "/>
 <rect x="454" y="125" width="121" height="135" opacity="0.7" fill="#FFFFFF" stroke="none"/>
 <rect x="454" y="125" width="121" height="135" opacity="1" fill="none" stroke="#000000"/>
 <text x="494" y="135" dy="0.76em" text-anchor="start" font-family="sans-serif" font-size="9.67741935483871" opacity="1" fill="#000000">
diff --git a/benches/throughput_criterion.rs b/benches/throughput_criterion.rs
index 5fb7ce7..515610e 100644
--- a/benches/throughput_criterion.rs
+++ b/benches/throughput_criterion.rs
@@ -4,7 +4,7 @@ use std::slice;
 use std::hash::Hasher;
 
 use criterion::measurement::WallTime;
-use criterion::{criterion_group, criterion_main, Criterion, Throughput, PlotConfiguration, AxisScale, BenchmarkGroup, BenchmarkId};
+use criterion::{criterion_group, criterion_main, Criterion, Throughput, PlotConfiguration, AxisScale, BenchmarkGroup, BenchmarkId, black_box};
 use rand::Rng;
 
 use gxhash::*;
@@ -21,9 +21,9 @@ fn benchmark<F>(c: &mut BenchmarkGroup<WallTime>, data: &[u8], name: &str, deleg
         c.throughput(Throughput::Bytes(len as u64));
 
         let slice = &data[0..len]; // Aligned
-        // let slice = &data[1..len]; // Unaligned
+        //let slice = &data[1..len]; // Unaligned
         c.bench_with_input(BenchmarkId::new(name, len), slice, |bencher, input| {
-            bencher.iter(|| delegate(criterion::black_box(input), criterion::black_box(42)))
+            bencher.iter(|| black_box(delegate(black_box(input), black_box(42))))
         });
     }
 }
diff --git a/src/gxhash/platform/arm.rs b/src/gxhash/platform/arm.rs
index fc40b92..0aba171 100644
--- a/src/gxhash/platform/arm.rs
+++ b/src/gxhash/platform/arm.rs
@@ -25,7 +25,7 @@ pub unsafe fn load_unaligned(p: *const State) -> State {
     vld1q_s8(p as *const i8)
 }
 
-#[inline(always)]
+#[inline(never)]
 pub unsafe fn get_partial_safe(data: *const State, len: usize) -> State {
     // Temporary buffer filled with zeros
     let mut buffer = [0i8; VECTOR_SIZE];
@@ -34,6 +34,37 @@ pub unsafe fn get_partial_safe(data: *const State, len: usize) -> State {
     // Load the buffer into a __m256i vector
     let partial_vector = vld1q_s8(buffer.as_ptr());
     vaddq_s8(partial_vector, vdupq_n_s8(len as i8))
+
+    //let indices = vld1q_s8([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15].as_ptr());
+    //let mask = vreinterpretq_s8_u8(vcgtq_s8(vdupq_n_s8(len as i8), indices));
+
+    // Using simd_masked_load
+    // State::from(std::intrinsics::simd::simd_masked_load(core::simd::i8x16::from(mask), data as *const i8, core::simd::i8x16::from(vdupq_n_s8(len as i8))))
+    // std::intrinsics::simd::simd_masked_load(mask, data as *const i8, vdupq_n_s8(len as i8))
+
+    // Using std::simd
+    // use std::simd::*;
+    // use std::mem::transmute;
+    // let slice = std::slice::from_raw_parts(data as *const i8, len);
+    // let data: Simd<i8, 16> = Simd::<i8, 16>::load_or_default(&slice);
+    // let vector: State = transmute(data);
+    // return vector;
+}
+
+#[inline(always)]
+pub unsafe fn get_partial_unsafe_no_ub(data: *const State, len: usize) -> State {
+    let indices = vld1q_s8([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15].as_ptr());
+    let mask = vcgtq_s8(vdupq_n_s8(len as i8), indices);
+    use std::arch::asm;
+    let mut result: State;
+    asm!(
+        "ld1 {{v2.16b}}, [{src}]",
+        src = in(reg) data, out("v2") result,
+        options(nomem, nostack)
+    );
+    //let result = load_unaligned(data);
+    let partial_vector = vandq_s8(result, vreinterpretq_s8_u8(mask));
+    vaddq_s8(partial_vector, vdupq_n_s8(len as i8))
 }
 
 #[inline(always)]
diff --git a/src/gxhash/platform/mod.rs b/src/gxhash/platform/mod.rs
index f40d676..1c7a185 100644
--- a/src/gxhash/platform/mod.rs
+++ b/src/gxhash/platform/mod.rs
@@ -18,10 +18,12 @@ const PAGE_SIZE: usize = 0x1000;
 pub unsafe fn get_partial(p: *const State, len: usize) -> State {
     // Safety check
     if check_same_page(p) {
-        get_partial_unsafe(p, len)
+        get_partial_unsafe_no_ub(p, len)
     } else {
         get_partial_safe(p, len)
     }
+
+    //get_partial_safe(p, len)
 }
 
 #[inline(always)]
diff --git a/src/gxhash/platform/x86.rs b/src/gxhash/platform/x86.rs
index a5735f1..84eba63 100644
--- a/src/gxhash/platform/x86.rs
+++ b/src/gxhash/platform/x86.rs
@@ -1,8 +1,8 @@
 #[cfg(not(any(all(target_feature = "aes", target_feature = "sse2"), docsrs)))] // docs.rs bypasses the target_feature check
 compile_error!{"Gxhash requires aes and sse2 intrinsics. Make sure the processor supports it and build with RUSTFLAGS=\"-C target-cpu=native\" or RUSTFLAGS=\"-C target-feature=+aes,+sse2\"."}
 
-#[cfg(all(feature = "hybrid", not(any(target_feature = "aes", target_feature = "vaes", target_feature = "avx2"))))]
-compile_error!{"Hybrid feature is only available on x86 processors with avx2 and vaes intrinsics."}
+#[cfg(all(feature = "hybrid", not(all(target_feature = "aes", target_feature = "sse2", target_feature = "avx2"))))]
+compile_error!{"Hybrid feature is only available on x86 processors with avx2 intrinsics."}
 
 #[cfg(target_arch = "x86")]
 use core::arch::x86::*;
@@ -28,22 +28,73 @@ pub unsafe fn load_unaligned(p: *const State) -> State {
     _mm_loadu_si128(p)
 }
 
-#[inline(always)]
+#[inline(never)]
 pub unsafe fn get_partial_safe(data: *const State, len: usize) -> State {
     // Temporary buffer filled with zeros
     let mut buffer = [0i8; VECTOR_SIZE];
-    // Copy data into the buffer
     core::ptr::copy(data as *const i8, buffer.as_mut_ptr(), len);
-    // Load the buffer into a __m256i vector
     let partial_vector = _mm_loadu_si128(buffer.as_ptr() as *const State);
     _mm_add_epi8(partial_vector, _mm_set1_epi8(len as i8))
+
+    // Using URBD
+    //get_partial_unsafe(data, len)
+
+    // Using simd_masked_load
+    // let indices = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+    // let mask = _mm_cmpgt_epi8(_mm_set1_epi8(len as i8), indices);
+    // State::from(std::intrinsics::simd::simd_masked_load(core::simd::i8x16::from(mask), data as *const i8, core::simd::i8x16::from(_mm_set1_epi8(len as i8))))
+
+    // Using std::simd
+    // use std::simd::*;
+    // use std::mem::transmute;
+    // let slice = std::slice::from_raw_parts(data as *const i8, len);
+    // let data: Simd<i8, 16> = Simd::<i8, 16>::load_or_default(&slice);
+    // let vector: State = transmute(data);
+    // return vector;
+
+    // Using inline assembly to load out-of-bounds
+    // use std::arch::asm;
+    // let indices = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+    // let mask = _mm_cmpgt_epi8(_mm_set1_epi8(len as i8), indices);
+    // let mut result: State;
+    // asm!("movdqu [{}], {}", in(reg) data, out(xmm_reg) result, options(pure, nomem, nostack));
+    // let partial_vector = _mm_and_si128(result, mask);
+    // _mm_add_epi8(partial_vector, _mm_set1_epi8(len as i8))
+}
+
+#[inline(always)]
+pub unsafe fn get_partial_unsafe_no_ub(data: *const State, len: usize) -> State {
+    // Using inline assembly to load out-of-bounds
+    use std::arch::asm;
+    let indices = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+    let mask = _mm_cmpgt_epi8(_mm_set1_epi8(len as i8), indices);
+    let mut result: State;
+    asm!("movdqu {0}, [{1}]", out(xmm_reg) result, in(reg) data, options(pure, nomem, nostack));
+    let partial_vector = _mm_and_si128(result, mask);
+    _mm_add_epi8(partial_vector, _mm_set1_epi8(len as i8))
+
+    // Using simd_masked_load
+    // let indices = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+    // let mask = _mm_cmpgt_epi8(_mm_set1_epi8(len as i8), indices);
+    // State::from(std::intrinsics::simd::simd_masked_load(core::simd::i8x16::from(mask), data as *const i8, core::simd::i8x16::from(_mm_set1_epi8(len as i8))))
+
+    // Using std::simd
+    // use std::simd::*;
+    // use std::mem::transmute;
+    // let slice = std::slice::from_raw_parts(data as *const i8, len);
+    // let data: Simd<i8, 16> = Simd::<i8, 16>::load_or_default(&slice);
+    // let vector: State = transmute(data);
+    // return vector;
+
+    //return get_partial_safe(data, len);
 }
 
 #[inline(always)]
 pub unsafe fn get_partial_unsafe(data: *const State, len: usize) -> State {
     let indices = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
     let mask = _mm_cmpgt_epi8(_mm_set1_epi8(len as i8), indices);
-    let partial_vector = _mm_and_si128(_mm_loadu_si128(data), mask);
+    let d: __m128i = _mm_loadu_si128(data);
+    let partial_vector = _mm_and_si128(d, mask);
     _mm_add_epi8(partial_vector, _mm_set1_epi8(len as i8))
 }
 
diff --git a/src/lib.rs b/src/lib.rs
index 705a0bf..d675cf9 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,3 +1,5 @@
+#![feature(core_intrinsics)]
+#![feature(portable_simd)]
 #![cfg_attr(not(feature = "std"), no_std)]
 // Hybrid SIMD width usage currently requires unstable 'stdsimd'
 #![cfg_attr(feature = "hybrid", feature(stdarch_x86_avx512))]