Skip to content

Commit 6170d36

Browse files
committed
Tie captures to the Haystack directly, not via Re
1 parent f57313a commit 6170d36

File tree

7 files changed

+143
-160
lines changed

7 files changed

+143
-160
lines changed

src/re.rs

Lines changed: 33 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,7 @@ pub trait Re: Sized {
1717
/// The haystack that can be searched by this [Re] using the
1818
/// [is_match_between][Re::is_match_between] and [captures_between][Re::captures_between]
1919
/// methods.
20-
type Haystack: Haystack<Slice = Self::Slice> + ?Sized;
21-
22-
/// The slice type of the associated [Haystack] that is returned by [Captures] methods when
23-
/// extracting matches and submatches.
24-
type Slice: Writable + ?Sized;
20+
type Haystack<'h>: Haystack + 'h;
2521

2622
/// Attempt to compile the given regular expression for use inside of a [Structex][crate::Structex].
2723
///
@@ -43,7 +39,7 @@ pub trait Re: Sized {
4339
/// This does not need to search for the leftmost-longest match and where possible should be
4440
/// faster to run that [Re::captures_between] which needs to extract the position of the match
4541
/// itself and all submatches.
46-
fn is_match_between(&self, haystack: &Self::Haystack, from: usize, to: usize) -> bool;
42+
fn is_match_between(&self, haystack: Self::Haystack<'_>, from: usize, to: usize) -> bool;
4743

4844
/// Searches for the first match of this regex between the given byte offsets in the given
4945
/// haystack, returning the overall match along with the matches of each capture group in the
@@ -52,27 +48,29 @@ pub trait Re: Sized {
5248
/// See [RawCaptures::new] for requirements around constructing the return type.
5349
fn captures_between(
5450
&self,
55-
haystack: &Self::Haystack,
51+
haystack: Self::Haystack<'_>,
5652
from: usize,
5753
to: usize,
5854
) -> Option<RawCaptures>;
5955
}
6056

6157
/// A haystack is an associated type on [Re] that the regular expression engine can be run against.
6258
///
63-
/// Typically this is a [str] but some engines may support richer types in order to provide
59+
/// Typically this is a [&str] but some engines may support richer types in order to provide
6460
/// searching of streams or discontiguous inputs.
65-
pub trait Haystack: Writable + fmt::Debug + PartialEq + Eq + Sync {
61+
pub trait Haystack: Writable + fmt::Debug + PartialEq + Eq + Copy {
6662
/// The output of the [slice][Haystack::slice] method.
6763
///
6864
/// Typically the same type as the haystack itself but not required to be so.
69-
type Slice: Writable + Sync + ?Sized;
65+
type Slice<'h>: Writable
66+
where
67+
Self: 'h;
7068

7169
/// A contiguous sub-section of the haystack between the given bytes offsets.
7270
///
7371
/// The given byte offsets from a half-open interval, inclusive of `from` but omitting `to`.
7472
/// This is the same semantics as a normal Rust range `from..to`.
75-
fn slice(&self, from: usize, to: usize) -> &Self::Slice;
73+
fn slice<'h>(&'h self, from: usize, to: usize) -> Self::Slice<'h>;
7674

7775
/// The maximum length of the full haystack in bytes.
7876
///
@@ -90,31 +88,13 @@ pub trait Writable {
9088
W: io::Write;
9189
}
9290

93-
impl Haystack for str {
94-
type Slice = str;
95-
96-
fn slice(&self, from: usize, to: usize) -> &Self::Slice {
97-
&self[from..to]
98-
}
99-
100-
fn max_len(&self) -> usize {
101-
self.len()
102-
}
103-
}
104-
105-
impl Writable for str {
106-
fn write_to<W>(&self, w: &mut W) -> io::Result<usize>
91+
impl Haystack for &str {
92+
type Slice<'h>
93+
= &'h str
10794
where
108-
W: io::Write,
109-
{
110-
w.write_all(self.as_bytes()).map(|_| self.len())
111-
}
112-
}
95+
Self: 'h;
11396

114-
impl Haystack for String {
115-
type Slice = str;
116-
117-
fn slice(&self, from: usize, to: usize) -> &Self::Slice {
97+
fn slice(&self, from: usize, to: usize) -> &str {
11898
&self[from..to]
11999
}
120100

@@ -123,7 +103,7 @@ impl Haystack for String {
123103
}
124104
}
125105

126-
impl Writable for String {
106+
impl Writable for &str {
127107
fn write_to<W>(&self, w: &mut W) -> io::Result<usize>
128108
where
129109
W: io::Write,
@@ -132,31 +112,13 @@ impl Writable for String {
132112
}
133113
}
134114

135-
impl Haystack for [u8] {
136-
type Slice = [u8];
137-
138-
fn slice(&self, from: usize, to: usize) -> &Self::Slice {
139-
&self[from..to]
140-
}
141-
142-
fn max_len(&self) -> usize {
143-
self.len()
144-
}
145-
}
146-
147-
impl Writable for [u8] {
148-
fn write_to<W>(&self, w: &mut W) -> io::Result<usize>
115+
impl Haystack for &[u8] {
116+
type Slice<'h>
117+
= &'h [u8]
149118
where
150-
W: io::Write,
151-
{
152-
w.write_all(self).map(|_| self.len())
153-
}
154-
}
155-
156-
impl Haystack for Vec<u8> {
157-
type Slice = [u8];
119+
Self: 'h;
158120

159-
fn slice(&self, from: usize, to: usize) -> &Self::Slice {
121+
fn slice(&self, from: usize, to: usize) -> &[u8] {
160122
&self[from..to]
161123
}
162124

@@ -165,7 +127,7 @@ impl Haystack for Vec<u8> {
165127
}
166128
}
167129

168-
impl Writable for Vec<u8> {
130+
impl Writable for &[u8] {
169131
fn write_to<W>(&self, w: &mut W) -> io::Result<usize>
170132
where
171133
W: io::Write,
@@ -212,19 +174,19 @@ impl RawCaptures {
212174
/// Represents the capture group positions for a single [Re] match in terms of byte offsets into
213175
/// the original haystack that the match was run against.
214176
#[derive(Debug, PartialEq, Eq)]
215-
pub struct Captures<'h, R>
177+
pub struct Captures<H>
216178
where
217-
R: Re,
179+
H: Haystack,
218180
{
219-
haystack: &'h R::Haystack,
181+
haystack: H,
220182
caps: Vec<Option<(usize, usize)>>,
221183
}
222184

223-
impl<'h, R> Captures<'h, R>
185+
impl<H> Captures<H>
224186
where
225-
R: Re,
187+
H: Haystack,
226188
{
227-
pub(crate) fn new(haystack: &'h R::Haystack, caps: Vec<Option<(usize, usize)>>) -> Self {
189+
pub(crate) fn new(haystack: H, caps: Vec<Option<(usize, usize)>>) -> Self {
228190
Self { haystack, caps }
229191
}
230192

@@ -265,21 +227,21 @@ where
265227
}
266228

267229
/// The full text of the match in the original haystack.
268-
pub fn match_text(&self) -> &'h R::Slice {
230+
pub fn match_text(&self) -> H::Slice<'_> {
269231
let (from, to) = self.get_match();
270232

271233
self.haystack.slice(from, to)
272234
}
273235

274236
/// The full text of the submatch, if present, in the original haystack.
275-
pub fn submatch_text(&self, n: usize) -> Option<&'h R::Slice> {
237+
pub fn submatch_text(&self, n: usize) -> Option<H::Slice<'_>> {
276238
let (from, to) = self.get(n)?;
277239

278240
Some(self.haystack.slice(from, to))
279241
}
280242

281243
/// Iterate over all submatches starting with the full match.
282-
pub fn iter_submatches(&'h self) -> impl Iterator<Item = Option<&'h R::Slice>> {
244+
pub fn iter_submatches(&self) -> impl Iterator<Item = Option<H::Slice<'_>>> {
283245
self.caps
284246
.iter()
285247
.map(|cap| cap.map(|(from, to)| self.haystack.slice(from, to)))
@@ -289,20 +251,19 @@ where
289251
#[cfg(feature = "regex")]
290252
impl Re for regex::Regex {
291253
type CompileError = regex::Error;
292-
type Haystack = str;
293-
type Slice = str;
254+
type Haystack<'h> = &'h str;
294255

295256
fn compile(re: &str) -> Result<Self, Self::CompileError> {
296257
regex::RegexBuilder::new(re).multi_line(true).build()
297258
}
298259

299-
fn is_match_between(&self, haystack: &Self::Haystack, from: usize, to: usize) -> bool {
260+
fn is_match_between(&self, haystack: Self::Haystack<'_>, from: usize, to: usize) -> bool {
300261
self.is_match(&haystack[from..to])
301262
}
302263

303264
fn captures_between(
304265
&self,
305-
haystack: &Self::Haystack,
266+
haystack: Self::Haystack<'_>,
306267
from: usize,
307268
to: usize,
308269
) -> Option<RawCaptures> {

src/se/extract.rs

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use crate::{
22
compile::Inst,
3-
re::{RawCaptures, Re},
3+
re::{Haystack, RawCaptures, Re},
44
se::{Dot, Inner, MatchesInner, TaggedCaptures},
55
};
66
use std::sync::Arc;
@@ -43,31 +43,33 @@ impl Extract {
4343
}
4444
}
4545

46-
pub(super) struct Iter<'h, R>
46+
pub(super) struct Iter<'s, 'h, R, H>
4747
where
48-
R: Re,
48+
R: Re<Haystack<'h> = H>,
49+
H: Haystack,
4950
{
50-
haystack: &'h R::Haystack,
51-
ext: &'h Extract,
51+
haystack: R::Haystack<'h>,
52+
ext: &'s Extract,
5253
inner: Arc<Inner<R>>,
5354
/// The original parent dot we are extracting from
5455
parent: Dot,
5556
/// The child branch we are currently iterating over
56-
child: Option<Box<MatchesInner<'h, R>>>,
57+
child: Option<Box<MatchesInner<'s, 'h, R, H>>>,
5758
/// The current match
5859
held: Option<RawCaptures>,
5960
/// The current byte offset we are up to
6061
pos: usize,
6162
}
6263

63-
impl<'h, R> Iter<'h, R>
64+
impl<'s, 'h, R, H> Iter<'s, 'h, R, H>
6465
where
65-
R: Re,
66+
R: Re<Haystack<'h> = H>,
67+
H: Haystack,
6668
{
6769
pub fn new(
68-
haystack: &'h R::Haystack,
70+
haystack: R::Haystack<'h>,
6971
parent: Dot,
70-
ext: &'h Extract,
72+
ext: &'s Extract,
7173
inner: Arc<Inner<R>>,
7274
) -> Self {
7375
let pos = parent.from();
@@ -100,11 +102,12 @@ where
100102
}
101103
}
102104

103-
impl<'h, R> Iterator for Iter<'h, R>
105+
impl<'s, 'h, R, H> Iterator for Iter<'s, 'h, R, H>
104106
where
105-
R: Re,
107+
R: Re<Haystack<'h> = H>,
108+
H: Haystack,
106109
{
107-
type Item = TaggedCaptures<'h, R>;
110+
type Item = TaggedCaptures<H>;
108111

109112
fn next(&mut self) -> Option<Self::Item> {
110113
loop {

src/se/guard.rs

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use crate::{
22
compile::Inst,
3-
re::Re,
3+
re::{Haystack, Re},
44
se::{Dot, Inner, MatchesInner},
55
};
66
use std::sync::Arc;
@@ -42,14 +42,15 @@ impl Guard {
4242
Some(self)
4343
}
4444

45-
pub(super) fn apply<'h, R>(
46-
&'h self,
47-
haystack: &'h R::Haystack,
45+
pub(super) fn apply<'s, 'h, R, H>(
46+
&'s self,
47+
haystack: R::Haystack<'h>,
4848
dot: Dot,
4949
inner: Arc<Inner<R>>,
50-
) -> Option<MatchesInner<'h, R>>
50+
) -> Option<MatchesInner<'s, 'h, R, H>>
5151
where
52-
R: Re,
52+
R: Re<Haystack<'h> = H>,
53+
H: Haystack,
5354
{
5455
let (from, to) = dot.loc();
5556
let is_match = inner.re[self.re].is_match_between(haystack, from, to);

0 commit comments

Comments
 (0)