Skip to content

Commit 9043aaf

Browse files
authored
Improve varlen encoding of BufferBackend (#64)
* speed-up decoding of var_usize - resolve: improves by 32% - iter: improves by 286% Other benchmarks are not significantly affected. * restructure BufferBackend Iter * improve naming parity between resolve_index_to_str methods * rename locals * add #[inline] to some BufferBackend internals * Revert "add #[inline] to some BufferBackend internals" This reverts commit 596bfcc.
1 parent 143eb51 commit 9043aaf

File tree

1 file changed

+50
-19
lines changed

1 file changed

+50
-19
lines changed

src/backend/buffer.rs

+50-19
Original file line numberDiff line numberDiff line change
@@ -110,15 +110,15 @@ where
110110
unsafe fn resolve_index_to_str_unchecked(&self, index: usize) -> &str {
111111
// SAFETY: The function is marked unsafe so that the caller guarantees
112112
// that required invariants are checked.
113-
let slice_len = unsafe { self.buffer.get_unchecked(index..) };
113+
let bytes = unsafe { self.buffer.get_unchecked(index..) };
114114
// SAFETY: The function is marked unsafe so that the caller guarantees
115115
// that required invariants are checked.
116-
let (str_len, str_len_bytes) = unsafe { decode_var_usize_unchecked(slice_len) };
117-
let start_str = index + str_len_bytes;
116+
let (str_len, str_len_bytes) = unsafe { decode_var_usize_unchecked(bytes) };
117+
let index_str = index + str_len_bytes;
118118
let str_bytes =
119119
// SAFETY: The function is marked unsafe so that the caller guarantees
120120
// that required invariants are checked.
121-
unsafe { self.buffer.get_unchecked(start_str..start_str + str_len) };
121+
unsafe { self.buffer.get_unchecked(index_str..index_str + str_len) };
122122
// SAFETY: It is guaranteed by the backend that only valid strings
123123
// are stored in this portion of the buffer.
124124
unsafe { str::from_utf8_unchecked(str_bytes) }
@@ -227,9 +227,26 @@ fn encode_var_usize(buffer: &mut Vec<u8>, mut value: usize) -> usize {
227227
#[inline]
228228
unsafe fn decode_var_usize_unchecked(buffer: &[u8]) -> (usize, usize) {
229229
let first = unsafe { *buffer.get_unchecked(0) };
230-
if first <= 0x7F_u8 {
231-
return (first as usize, 1);
230+
match first {
231+
byte if byte <= 0x7F_u8 => (byte as usize, 1),
232+
_ => unsafe { decode_var_usize_unchecked_cold(buffer) },
232233
}
234+
}
235+
236+
/// Decodes from a variable length encoded `usize` from the buffer.
237+
///
238+
/// Returns the decoded value as first return value.
239+
/// Returns the number of decoded bytes as second return value.
240+
///
241+
/// # Safety
242+
///
243+
/// The caller has to make sure that the buffer contains the necessary
244+
/// bytes needed to properly decode a valid `usize` value.
245+
///
246+
/// Uncommon case for string lengths of 254 or greater.
247+
#[inline]
248+
#[cold]
249+
unsafe fn decode_var_usize_unchecked_cold(buffer: &[u8]) -> (usize, usize) {
233250
let mut result: usize = 0;
234251
let mut i = 0;
235252
loop {
@@ -248,11 +265,24 @@ unsafe fn decode_var_usize_unchecked(buffer: &[u8]) -> (usize, usize) {
248265
///
249266
/// Returns the decoded value as first return value.
250267
/// Returns the number of decoded bytes as second return value.
268+
#[inline]
251269
fn decode_var_usize(buffer: &[u8]) -> Option<(usize, usize)> {
252-
if !buffer.is_empty() && buffer[0] <= 0x7F_u8 {
253-
// Shortcut the common case for low values.
254-
return Some((buffer[0] as usize, 1));
270+
match buffer.first() {
271+
None => None,
272+
Some(&byte) if byte <= 0x7F_u8 => Some((byte as usize, 1)),
273+
_ => decode_var_usize_cold(buffer),
255274
}
275+
}
276+
277+
/// Decodes from a variable length encoded `usize` from the buffer.
278+
///
279+
/// Returns the decoded value as first return value.
280+
/// Returns the number of decoded bytes as second return value.
281+
///
282+
/// Uncommon case for string lengths of 254 or greater.
283+
#[inline]
284+
#[cold]
285+
fn decode_var_usize_cold(buffer: &[u8]) -> Option<(usize, usize)> {
256286
let mut result: usize = 0;
257287
let mut i = 0;
258288
loop {
@@ -410,17 +440,17 @@ where
410440

411441
pub struct Iter<'a, S> {
412442
backend: &'a BufferBackend<S>,
413-
yielded: usize,
414-
current: usize,
443+
remaining: usize,
444+
next: usize,
415445
}
416446

417447
impl<'a, S> Iter<'a, S> {
418448
#[cfg_attr(feature = "inline-more", inline)]
419449
pub fn new(backend: &'a BufferBackend<S>) -> Self {
420450
Self {
421451
backend,
422-
yielded: 0,
423-
current: 0,
452+
remaining: backend.len_strings,
453+
next: 0,
424454
}
425455
}
426456
}
@@ -440,11 +470,11 @@ where
440470
#[inline]
441471
fn next(&mut self) -> Option<Self::Item> {
442472
self.backend
443-
.resolve_index_to_str(self.current)
444-
.and_then(|(string, next_string_index)| {
445-
let symbol = S::try_from_usize(self.current)?;
446-
self.current = next_string_index;
447-
self.yielded += 1;
473+
.resolve_index_to_str(self.next)
474+
.and_then(|(string, next)| {
475+
let symbol = S::try_from_usize(self.next)?;
476+
self.next = next;
477+
self.remaining -= 1;
448478
Some((symbol, string))
449479
})
450480
}
@@ -454,7 +484,8 @@ impl<'a, S> ExactSizeIterator for Iter<'a, S>
454484
where
455485
S: Symbol,
456486
{
487+
#[inline]
457488
fn len(&self) -> usize {
458-
self.backend.len_strings - self.yielded
489+
self.remaining
459490
}
460491
}

0 commit comments

Comments
 (0)