Skip to content

Commit c5b798d

Browse files
committed
templates: add string.match function
This allows for any matcher type and allows extracting a capture group by number.
1 parent ee202ea commit c5b798d

File tree

4 files changed

+104
-0
lines changed

4 files changed

+104
-0
lines changed

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,9 @@ to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
5757
* Glob patterns now support `{foo,bar}` syntax. There may be subtle behavior
5858
changes as we use the [globset](https://crates.io/crates/globset) library now.
5959

60+
* An identical string pattern system as revsets is now supported in the
61+
template language, and is exposed as `string.match(pattern)`.
62+
6063
### Fixed bugs
6164

6265
### Packaging changes

cli/src/template_builder.rs

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -902,6 +902,28 @@ fn builtin_string_methods<'a, L: TemplateLanguage<'a> + ?Sized>(
902902
Ok(out_property.into_dyn_wrapped())
903903
},
904904
);
905+
map.insert(
906+
"match",
907+
|_language, _diagnostics, _build_ctx, self_property, function| {
908+
let [needle_node] = function.expect_exact_arguments()?;
909+
let needle = template_parser::expect_string_pattern(needle_node)?;
910+
911+
let out_property = self_property.and_then(move |haystack| {
912+
let regex = needle.to_regex();
913+
let match_ = regex.find(haystack.as_bytes());
914+
915+
if let Some(m) = match_ {
916+
Ok(String::from_utf8(m.as_bytes().to_owned())?)
917+
} else {
918+
// FIXME(jade): maybe suboptimal to return string
919+
// unconditionally but we don't have an Option<String> type
920+
// yet.
921+
Ok(String::new())
922+
}
923+
});
924+
Ok(out_property.into_dyn_wrapped())
925+
},
926+
);
905927
map.insert(
906928
"starts_with",
907929
|language, diagnostics, build_ctx, self_property, function| {
@@ -2804,6 +2826,35 @@ mod tests {
28042826
insta::assert_snapshot!(
28052827
env.render_ok(r#""foo".contains(separate("o", "f", bad_string))"#), @"<Error: Bad>");
28062828

2829+
insta::assert_snapshot!(env.render_ok(r#""fooo".match(regex:'[a-f]o+')"#), @"fooo");
2830+
insta::assert_snapshot!(env.render_ok(r#""fa".match(regex:'[a-f]o+')"#), @"");
2831+
insta::assert_snapshot!(env.render_ok(r#""hello".match(regex:"h(ell)o")"#), @"hello");
2832+
insta::assert_snapshot!(env.render_ok(r#""HEllo".match(regex-i:"h(ell)o")"#), @"HEllo");
2833+
insta::assert_snapshot!(env.render_ok(r#""hEllo".match(glob:"h*o")"#), @"hEllo");
2834+
insta::assert_snapshot!(env.render_ok(r#""Hello".match(glob:"h*o")"#), @"");
2835+
insta::assert_snapshot!(env.render_ok(r#""HEllo".match(glob-i:"h*o")"#), @"HEllo");
2836+
insta::assert_snapshot!(env.render_ok(r#""hello".match("he")"#), @"he");
2837+
insta::assert_snapshot!(env.render_ok(r#""hello".match(substring:"he")"#), @"he");
2838+
insta::assert_snapshot!(env.render_ok(r#""hello".match(exact:"he")"#), @"");
2839+
2840+
// NOTE: this is asserting that we are still parsing as string regex
2841+
// before reparsing as a bytes regex. We will plausibly stop doing that
2842+
// (https://github.com/jj-vcs/jj/pull/6899#discussion_r2214764968).
2843+
// If we stop doing that, this test should instead confirm that a bad
2844+
// regex still generates a good error when the invalid utf-8 is
2845+
// converted to a string.
2846+
insta::assert_snapshot!(env.parse_err(r#""🥺".match(regex:'^(?-u:.).+$')"#), @r#"
2847+
--> 1:11
2848+
|
2849+
1 | "🥺".match(regex:'^(?-u:.).+$')
2850+
| ^-----------------^
2851+
|
2852+
= Bad string pattern: regex parse error:
2853+
^(?-u:.).+$
2854+
^
2855+
error: pattern can match invalid UTF-8
2856+
"#);
2857+
28072858
insta::assert_snapshot!(env.render_ok(r#""".first_line()"#), @"");
28082859
insta::assert_snapshot!(env.render_ok(r#""foo\nbar".first_line()"#), @"foo");
28092860

docs/templates.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -406,6 +406,10 @@ defined.
406406
* `.len() -> Integer`: Length in UTF-8 bytes.
407407
* `.contains(needle: Stringify) -> Boolean`: Whether the string contains the
408408
provided stringifiable value as a substring.
409+
* `.match(needle: StringPattern) -> Boolean`: Extracts
410+
the matching part of the string for the given pattern.
411+
412+
An empty string is returned if there is no match.
409413
* `.first_line() -> String`
410414
* `.lines() -> List<String>`: Split into lines excluding newline characters.
411415
* `.upper() -> String`

lib/src/str_util.rs

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,14 @@ impl GlobPattern {
5959
pub fn as_str(&self) -> &str {
6060
self.glob.glob()
6161
}
62+
63+
/// Converts to a bytes regex.
64+
pub fn to_regex(&self) -> regex::bytes::Regex {
65+
regex::bytes::RegexBuilder::new(self.glob.regex())
66+
.dot_matches_new_line(true)
67+
.build()
68+
.expect("glob regex should be valid")
69+
}
6270
}
6371

6472
impl Debug for GlobPattern {
@@ -283,6 +291,44 @@ impl StringPattern {
283291
}
284292
}
285293

294+
/// Converts the pattern into a bytes regex.
295+
pub fn to_regex(&self) -> regex::bytes::Regex {
296+
match self {
297+
StringPattern::Exact(literal) => {
298+
regex::bytes::RegexBuilder::new(&format!("^{}$", regex::escape(literal)))
299+
.build()
300+
.expect("impossible to fail to compile regex of literal")
301+
}
302+
StringPattern::ExactI(literal) => {
303+
regex::bytes::RegexBuilder::new(&format!("^{}$", regex::escape(literal)))
304+
.case_insensitive(true)
305+
.build()
306+
.expect("impossible to fail to compile regex of literal")
307+
}
308+
StringPattern::Substring(literal) => {
309+
regex::bytes::RegexBuilder::new(&regex::escape(literal))
310+
.build()
311+
.expect("impossible to fail to compile regex of literal")
312+
}
313+
StringPattern::SubstringI(literal) => {
314+
regex::bytes::RegexBuilder::new(&regex::escape(literal))
315+
.case_insensitive(true)
316+
.build()
317+
.expect("impossible to fail to compile regex of literal")
318+
}
319+
StringPattern::Glob(glob_pattern) => glob_pattern.to_regex(),
320+
// The regex generated represents the case insensitivity itself
321+
StringPattern::GlobI(glob_pattern) => glob_pattern.to_regex(),
322+
StringPattern::Regex(regex) => regex::bytes::RegexBuilder::new(regex.as_str())
323+
.build()
324+
.expect("bytes regexes are a superset of unicode regexes"),
325+
StringPattern::RegexI(regex) => regex::bytes::RegexBuilder::new(regex.as_str())
326+
.case_insensitive(true)
327+
.build()
328+
.expect("bytes regexes are a superset of unicode regexes"),
329+
}
330+
}
331+
286332
/// Iterates entries of the given `map` whose string keys match this
287333
/// pattern.
288334
pub fn filter_btree_map<'a, 'b, K: Borrow<str> + Ord, V>(

0 commit comments

Comments
 (0)