Skip to content

Commit

Permalink
Add the "teddy" algorithm from aho-corasick
Browse files Browse the repository at this point in the history
Per suggestion from @BurntSushi [here](tafia/quick-xml#664 (comment))

On my M1, tt appears to be slower but competitive with memchr up to memchr3,
then start being the from 5-16
  • Loading branch information
Dr-Emann committed Oct 16, 2023
1 parent 3effb40 commit 2b65380
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 0 deletions.
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ benchmarks = []
pattern = []

[dev-dependencies]
aho-corasick = "1.1.0"
proptest = "1.0.0"
lazy_static = "1.0.0"
region = "3.0.0"
Expand Down
20 changes: 20 additions & 0 deletions benches/benchmarks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,10 @@ fn spaces(c: &mut Criterion) {
group.bench_function("stdlib_iter_position", |b| {
b.iter(|| haystack.bytes().position(|c| c == b' '));
});
group.bench_function("teddy", |b| {
let searcher = aho_corasick::packed::Searcher::new([" "]).unwrap();
b.iter(|| searcher.find(&haystack).map(|m| m.start()));
});
group.bench_function("memchr", |b| {
b.iter(|| memchr::memchr(b' ', haystack.as_bytes()));
});
Expand Down Expand Up @@ -69,6 +73,10 @@ fn xml3(c: &mut Criterion) {
.position(|c| c == b'<' || c == b'>' || c == b'&')
});
});
group.bench_function("teddy", |b| {
let searcher = aho_corasick::packed::Searcher::new(["<", ">", "&"]).unwrap();
b.iter(|| searcher.find(&haystack).map(|m| m.start()));
});
group.bench_function("memchr", |b| {
b.iter(|| memchr::memchr3(b'<', b'>', b'&', haystack.as_bytes()));
});
Expand Down Expand Up @@ -98,6 +106,10 @@ fn xml5(c: &mut Criterion) {
.position(|c| c == b'<' || c == b'>' || c == b'&' || c == b'\'' || c == b'"')
});
});
group.bench_function("teddy", |b| {
let searcher = aho_corasick::packed::Searcher::new(["<", ">", "&", "'", "\""]).unwrap();
b.iter(|| searcher.find(&haystack).map(|m| m.start()));
});
group.bench_function("memchr", |b| {
b.iter(|| {
let bytes = haystack.as_bytes();
Expand Down Expand Up @@ -174,6 +186,10 @@ fn big_16(c: &mut Criterion) {
})
});
});
group.bench_function("teddy", |b| {
let searcher = aho_corasick::packed::Searcher::new(b"ABCDEFGHIJKLMNOP".iter().map(|b| std::array::from_ref(b))).unwrap();
b.iter(|| searcher.find(&haystack).map(|m| m.start()));
});
group.bench_function("memchr", |b| {
b.iter(|| {
let bytes = haystack.as_bytes();
Expand Down Expand Up @@ -253,6 +269,10 @@ fn big_16(c: &mut Criterion) {
})
});
});
group.bench_function("teddy", |b| {
let searcher = aho_corasick::packed::Searcher::new(b"ABCDEFGHIJKLMNOP".iter().map(|b| std::array::from_ref(b))).unwrap();
b.iter(|| searcher.find(&haystack).map(|m| m.start()));
});
group.bench_function("memchr", |b| {
b.iter(|| {
let bytes = haystack.as_bytes();
Expand Down

0 comments on commit 2b65380

Please sign in to comment.