From e7890e09d1945befa6a0b3d5a0b133cf25660031 Mon Sep 17 00:00:00 2001 From: arriqaaq Date: Mon, 6 Jan 2025 13:19:15 +0530 Subject: [PATCH 1/2] feat: add query iterator --- src/art.rs | 55 +++++++++--------- src/iter.rs | 163 ++++++++++++++++++++++++++++------------------------ 2 files changed, 117 insertions(+), 101 deletions(-) diff --git a/src/art.rs b/src/art.rs index dbf49e2..b3ce528 100644 --- a/src/art.rs +++ b/src/art.rs @@ -1713,16 +1713,20 @@ impl Tree { self.size == 0 } - pub fn scan_at_ts(&self, range: R, ts: u64) -> Vec<(Vec, V)> + pub fn scan_at_ts<'a, R>( + &'a self, + range: R, + ts: u64, + ) -> impl Iterator, V)> + 'a where - R: RangeBounds

, + R: RangeBounds

+ 'a, { scan_node(self.root.as_ref(), range, QueryType::LatestByTs(ts)) } - pub fn keys_at_ts(&self, range: R, ts: u64) -> Vec> + pub fn keys_at_ts<'a, R>(&'a self, range: R, ts: u64) -> impl Iterator> + 'a where - R: RangeBounds

, + R: RangeBounds

+ 'a, { query_keys_at_node(self.root.as_ref(), range, QueryType::LatestByTs(ts)) } @@ -3333,7 +3337,7 @@ mod tests { #[test] fn scan_empty_range() { let tree: Tree = Tree::new(); - let result = tree.scan_at_ts(RangeFull {}, 0); + let result: Vec<_> = tree.scan_at_ts(RangeFull {}, 0).collect(); assert!(result.is_empty()); } @@ -3347,7 +3351,7 @@ mod tests { } let range = VariableSizeKey::from_slice("key_1".as_bytes()) ..=VariableSizeKey::from_slice("key_2".as_bytes()); - let values = tree.scan_at_ts(range, 0); + let values: Vec<_> = tree.scan_at_ts(range, 0).collect(); assert_eq!(values.len(), 2); } @@ -3359,7 +3363,7 @@ mod tests { tree.insert(&VariableSizeKey::from_str(key).unwrap(), 1, 0, 0) .unwrap(); } - let values = tree.scan_at_ts(RangeFull {}, 0); + let values: Vec<_> = tree.scan_at_ts(RangeFull {}, 0).collect(); assert_eq!(values.len(), keys.len()); } @@ -3373,7 +3377,7 @@ mod tests { } let range = VariableSizeKey::from_slice("key_4".as_bytes()) ..VariableSizeKey::from_slice("key_5".as_bytes()); - let values = tree.scan_at_ts(range, 0); + let values: Vec<_> = tree.scan_at_ts(range, 0).collect(); assert!(values.is_empty()); } @@ -3391,7 +3395,7 @@ mod tests { .unwrap(); } for (i, _) in keys.iter().enumerate() { - let values = tree.scan_at_ts(RangeFull {}, i as u64); + let values: Vec<_> = tree.scan_at_ts(RangeFull {}, i as u64).collect(); assert_eq!(values.len(), i + 1); } } @@ -3407,7 +3411,7 @@ mod tests { .unwrap(); } - let values = tree.scan_at_ts(RangeFull {}, num_keys); + let values: Vec<_> = tree.scan_at_ts(RangeFull {}, num_keys).collect(); assert_eq!(values.len(), num_keys as usize); // Expect all keys to be visible } @@ -3428,15 +3432,15 @@ mod tests { } // Scan at a timestamp before any insertions - let result_before = tree.scan_at_ts(RangeFull {}, 0); + let result_before: Vec<_> = tree.scan_at_ts(RangeFull {}, 0).collect(); assert!(result_before.is_empty()); // Scan between insertions - let result_mid = tree.scan_at_ts(RangeFull {}, 4); + let result_mid: Vec<_> = tree.scan_at_ts(RangeFull {}, 4).collect(); assert_eq!(result_mid.len(), 2); // Expect first two keys to be visible // Scan after all insertions - let result_after = tree.scan_at_ts(RangeFull {}, 7); + let result_after: Vec<_> = tree.scan_at_ts(RangeFull {}, 7).collect(); assert_eq!(result_after.len(), keys.len()); // Expect all keys to be visible } @@ -3447,8 +3451,7 @@ mod tests { tree.insert(&VariableSizeKey::from_str("key_1").unwrap(), 42, 0, 0) .unwrap(); - let values = tree.scan_at_ts(RangeFull {}, 0); - + let values: Vec<_> = tree.scan_at_ts(RangeFull {}, 0).collect(); assert_eq!(values.len(), 1); assert_eq!(values[0].1, 42); } @@ -3456,7 +3459,7 @@ mod tests { #[test] fn keys_at_empty_range() { let tree: Tree = Tree::new(); - let keys = tree.keys_at_ts(RangeFull {}, 0); + let keys: Vec<_> = tree.keys_at_ts(RangeFull {}, 0).collect(); assert!(keys.is_empty()); } @@ -3470,7 +3473,7 @@ mod tests { } let range = VariableSizeKey::from_slice("key_1".as_bytes()) ..=VariableSizeKey::from_slice("key_2".as_bytes()); - let keys = tree.keys_at_ts(range, 0); + let keys: Vec<_> = tree.keys_at_ts(range, 0).collect(); assert_eq!(keys.len(), 2); } @@ -3482,7 +3485,7 @@ mod tests { tree.insert(&VariableSizeKey::from_str(key).unwrap(), 1, 0, 0) .unwrap(); } - let keys = tree.keys_at_ts(RangeFull {}, 0); + let keys: Vec<_> = tree.keys_at_ts(RangeFull {}, 0).collect(); assert_eq!(keys.len(), keys_to_insert.len()); } @@ -3496,7 +3499,7 @@ mod tests { } let range = VariableSizeKey::from("key_4".as_bytes().to_vec()) ..VariableSizeKey::from("key_5".as_bytes().to_vec()); - let keys = tree.keys_at_ts(range, 0); + let keys: Vec<_> = tree.keys_at_ts(range, 0).collect(); assert!(keys.is_empty()); } @@ -3514,7 +3517,7 @@ mod tests { .unwrap(); } for (i, _) in keys_to_insert.iter().enumerate() { - let keys = tree.keys_at_ts(RangeFull {}, i as u64); + let keys: Vec<_> = tree.keys_at_ts(RangeFull {}, i as u64).collect(); assert_eq!(keys.len(), i + 1); } } @@ -3531,7 +3534,7 @@ mod tests { .unwrap(); } - let keys = tree.keys_at_ts(RangeFull {}, num_keys); + let keys: Vec<_> = tree.keys_at_ts(RangeFull {}, num_keys).collect(); assert_eq!(keys.len(), num_keys as usize); // Expect all keys to be visible // Sort the expected keys lexicographically @@ -3539,7 +3542,7 @@ mod tests { // Verify each key is proper for (expected_key, key) in expected_keys.iter().zip(keys.iter()) { - assert_eq!(key, expected_key.to_slice()); + assert_eq!(key.as_ref(), expected_key.to_slice()); } } @@ -3558,13 +3561,13 @@ mod tests { .unwrap(); } - let keys_before = tree.keys_at_ts(RangeFull {}, 0); + let keys_before: Vec<_> = tree.keys_at_ts(RangeFull {}, 0).collect(); assert!(keys_before.is_empty()); - let keys_mid = tree.keys_at_ts(RangeFull {}, 4); + let keys_mid: Vec<_> = tree.keys_at_ts(RangeFull {}, 4).collect(); assert_eq!(keys_mid.len(), 2); // Expect first two keys to be visible - let keys_after = tree.keys_at_ts(RangeFull {}, 7); + let keys_after: Vec<_> = tree.keys_at_ts(RangeFull {}, 7).collect(); assert_eq!(keys_after.len(), keys_to_insert.len()); // Expect all keys to be visible } @@ -3574,7 +3577,7 @@ mod tests { tree.insert(&VariableSizeKey::from_str("key_1").unwrap(), 42, 0, 0) .unwrap(); - let keys = tree.keys_at_ts(RangeFull {}, 0); + let keys: Vec<_> = tree.keys_at_ts(RangeFull {}, 0).collect(); assert_eq!(keys.len(), 1); } diff --git a/src/iter.rs b/src/iter.rs index 38dbf63..0a01971 100644 --- a/src/iter.rs +++ b/src/iter.rs @@ -534,109 +534,122 @@ where within_start_bound && within_end_bound } -pub(crate) fn scan_node( - node: Option<&Arc>>, +pub(crate) fn scan_node<'a, K, V, R>( + node: Option<&'a Arc>>, range: R, query_type: QueryType, -) -> Vec<(Vec, V)> +) -> impl Iterator, V)> + 'a where - K: KeyTrait, + K: KeyTrait + 'a, V: Clone, - R: RangeBounds, + R: RangeBounds + 'a, { - iterate(node, range, query_type, true) - .into_iter() - .filter_map(|(k, v_opt)| v_opt.map(|v| (k, v))) - .collect() + QueryIterator::new(node, range, query_type, true).filter_map(|(k, v_opt)| v_opt.map(|v| (k, v))) } -pub(crate) fn query_keys_at_node( - node: Option<&Arc>>, +pub(crate) fn query_keys_at_node<'a, K, V, R>( + node: Option<&'a Arc>>, range: R, query_type: QueryType, -) -> Vec> +) -> impl Iterator> + 'a where - K: KeyTrait, + K: KeyTrait + 'a, V: Clone, - R: RangeBounds, + R: RangeBounds + 'a, { - iterate(node, range, query_type, false) - .into_iter() - .map(|(k, _)| k) - .collect() + QueryIterator::new(node, range, query_type, false).map(|(k, _)| k) } -fn iterate( - node: Option<&Arc>>, +pub(crate) struct QueryIterator<'a, K: KeyTrait, V: Clone, R: RangeBounds> { + forward: ForwardIterState<'a, K, V>, + prefix: Vec, + prefix_lengths: Vec, range: R, query_type: QueryType, include_values: bool, -) -> Vec<(Vec, Option)> -where - K: KeyTrait, - V: Clone, - R: RangeBounds, -{ - let mut results = Vec::new(); - let mut forward = node.map_or_else(ForwardIterState::empty, |n| { - ForwardIterState::scan_at(n, &range, query_type) - }); - - let mut prefix = forward.prefix.clone(); - let mut prefix_lengths = Vec::new(); - - while let Some(node) = forward.iters.last_mut() { - let e = node.next(); - match e { - Some(other) => { - if let NodeType::Twig(twig) = &other.node_type { - if range.contains(&twig.key) { - // Iterate through leaves of the twig - if let Some(leaf) = twig.get_leaf_by_query(query_type) { - let key = twig.key.as_slice().to_vec(); - if include_values { - results.push((key, Some(leaf.value.clone()))); - } else { - results.push((key, None)); +} + +impl<'a, K: KeyTrait, V: Clone, R: RangeBounds> QueryIterator<'a, K, V, R> { + pub(crate) fn new( + node: Option<&'a Arc>>, + range: R, + query_type: QueryType, + include_values: bool, + ) -> Self { + let forward = node.map_or_else(ForwardIterState::empty, |n| { + ForwardIterState::scan_at(n, &range, query_type) + }); + let prefix = forward.prefix.clone(); + + Self { + forward, + prefix, + prefix_lengths: Vec::new(), + range, + query_type, + include_values, + } + } +} + +impl<'a, K: KeyTrait, V: Clone, R: RangeBounds> Iterator for QueryIterator<'a, K, V, R> { + type Item = (Box<[u8]>, Option); + + fn next(&mut self) -> Option { + // First try to get item from the current node iteration + while let Some(node) = self.forward.iters.last_mut() { + match node.next() { + Some(other) => { + if let NodeType::Twig(twig) = &other.node_type { + if self.range.contains(&twig.key) { + if let Some(leaf) = twig.get_leaf_by_query(self.query_type) { + let key = twig.key.as_slice(); + let value = if self.include_values { + Some(leaf.value.clone()) + } else { + None + }; + return Some((Box::from(key), value)); } + } else if is_key_out_of_range(&self.range, &twig.key) { + // stop iteration if the range end is exceeded + self.forward.iters.clear(); + return None; } - } else if is_key_out_of_range(&range, &twig.key) { - // stop iteration if the range end is exceeded - forward.iters.clear() + } else { + handle_non_twig_node( + &mut self.prefix, + &mut self.prefix_lengths, + &self.range, + other, + &mut self.forward.iters, + ); } - } else { - handle_non_twig_node( - &mut prefix, - &mut prefix_lengths, - &range, - other, - &mut forward.iters, - ); } - } - None => { - // Pop the iterator if no more elements - forward.iters.pop(); - // Restore the prefix to its previous state - if let Some(prefix_len_before) = prefix_lengths.pop() { - prefix.truncate(prefix_len_before); + None => { + // Pop the iterator if no more elements + self.forward.iters.pop(); + // Restore the prefix to its previous state + if let Some(prefix_len_before) = self.prefix_lengths.pop() { + self.prefix.truncate(prefix_len_before); + } } } } - } - // Iterate over all leafs in forward.leafs and append them to results - while let Some(leaf) = forward.leafs.pop_front() { - let key = leaf.0.as_slice().to_vec(); - let value = if include_values { - Some(leaf.1.value.clone()) + // If no more nodes to iterate, try the leaf queue + if let Some(leaf) = self.forward.leafs.pop_front() { + let key = leaf.0.as_slice(); + let value = if self.include_values { + Some(leaf.1.value.clone()) + } else { + None + }; + Some((Box::from(key), value)) } else { None - }; - results.push((key, value)); + } } - - results } #[cfg(test)] From c0a2154940aff9eab44edf65faeca16be180a88f Mon Sep 17 00:00:00 2001 From: arriqaaq Date: Mon, 6 Jan 2025 15:49:12 +0530 Subject: [PATCH 2/2] chore: dont use box for keys --- src/art.rs | 4 ++-- src/iter.rs | 27 ++++++++++++--------------- 2 files changed, 14 insertions(+), 17 deletions(-) diff --git a/src/art.rs b/src/art.rs index b3ce528..b21fda4 100644 --- a/src/art.rs +++ b/src/art.rs @@ -1592,7 +1592,7 @@ impl Tree { pub fn range<'a, R>( &'a self, range: R, - ) -> impl Iterator, &'a V, &'a u64, &'a u64)> + ) -> impl Iterator where R: RangeBounds

+ 'a, { @@ -1621,7 +1621,7 @@ impl Tree { pub fn range_with_versions<'a, R>( &'a self, range: R, - ) -> impl Iterator, &'a V, &'a u64, &'a u64)> + ) -> impl Iterator where R: RangeBounds

+ 'a, { diff --git a/src/iter.rs b/src/iter.rs index 0a01971..f87db0e 100644 --- a/src/iter.rs +++ b/src/iter.rs @@ -413,7 +413,7 @@ where } impl<'a, K: 'a + KeyTrait, V: Clone, R: RangeBounds> Iterator for Range<'a, K, V, R> { - type Item = (Box<[u8]>, &'a V, &'a u64, &'a u64); + type Item = (&'a [u8], &'a V, &'a u64, &'a u64); #[inline] fn next(&mut self) -> Option { @@ -446,7 +446,7 @@ impl<'a, K: 'a + KeyTrait, V: Clone, R: RangeBounds> Iterator for Range<'a, K self.forward.leafs.pop_front().map(|leaf| { ( - Box::from(leaf.0.as_slice()), + leaf.0.as_slice(), &leaf.1.value, &leaf.1.version, &leaf.1.ts, @@ -592,7 +592,7 @@ impl<'a, K: KeyTrait, V: Clone, R: RangeBounds> QueryIterator<'a, K, V, R> { } } -impl<'a, K: KeyTrait, V: Clone, R: RangeBounds> Iterator for QueryIterator<'a, K, V, R> { +impl> Iterator for QueryIterator<'_, K, V, R> { type Item = (Box<[u8]>, Option); fn next(&mut self) -> Option { @@ -1036,13 +1036,13 @@ mod tests { let results: Vec<_> = trie.range(range).collect(); let expected = vec![ - (Box::from(&b"blackberry"[..]), &4, &4, &0), - (Box::from(&b"blueberry"[..]), &5, &5, &0), - (Box::from(&b"cherry"[..]), &6, &6, &0), - (Box::from(&b"date"[..]), &7, &7, &0), - (Box::from(&b"fig"[..]), &8, &8, &0), - (Box::from(&b"grape"[..]), &9, &9, &0), - (Box::from(&b"kiwi"[..]), &10, &10, &0), + (&b"blackberry"[..], &4, &4, &0), + (&b"blueberry"[..], &5, &5, &0), + (&b"cherry"[..], &6, &6, &0), + (&b"date"[..], &7, &7, &0), + (&b"fig"[..], &8, &8, &0), + (&b"grape"[..], &9, &9, &0), + (&b"kiwi"[..], &10, &10, &0), ]; assert_eq!(results, expected); @@ -1082,13 +1082,10 @@ mod tests { let btree_range = Box::from(&b"berry"[..])..=Box::from(&b"kiwi"[..]); let btree_results: Vec<_> = btree .range(btree_range) - .map(|(k, v)| (k.clone(), *v)) + .map(|(k, v)| (k.as_ref(), *v)) .collect(); - let trie_expected: Vec<_> = trie_results - .iter() - .map(|(k, v, _, _)| (k.clone(), **v)) - .collect(); + let trie_expected: Vec<_> = trie_results.iter().map(|(k, v, _, _)| (*k, **v)).collect(); assert_eq!(trie_expected, btree_results); }