Skip to content

Commit 583edf9

Browse files
committed
templates: add string.match function
This allows for any matcher type and allows extracting a capture group by number.
1 parent a93bd87 commit 583edf9

File tree

4 files changed

+106
-0
lines changed

4 files changed

+106
-0
lines changed

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,9 @@ to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
109109
output using template expressions, similar to `jj op log`. Also added
110110
`--no-op-diff` flag to suppress the operation diff.
111111

112+
* A nearly identical string pattern system as revsets is now supported in the
113+
template language, and is exposed as `string.match(pattern)`.
114+
112115
### Fixed bugs
113116

114117
* `jj git clone` now correctly fetches all tags, unless `--fetch-tags` is

cli/src/template_builder.rs

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -909,6 +909,25 @@ fn builtin_string_methods<'a, L: TemplateLanguage<'a> + ?Sized>()
909909
Ok(out_property.into_dyn_wrapped())
910910
},
911911
);
912+
map.insert(
913+
"match",
914+
|_language, _diagnostics, _build_ctx, self_property, function| {
915+
let [needle_node] = function.expect_exact_arguments()?;
916+
let needle = template_parser::expect_string_pattern(needle_node)?;
917+
let regex = needle.to_regex();
918+
919+
let out_property = self_property.and_then(move |haystack| {
920+
if let Some(m) = regex.find(haystack.as_bytes()) {
921+
Ok(std::str::from_utf8(m.as_bytes())?.to_owned())
922+
} else {
923+
// We don't have optional strings, so empty string is the
924+
// right null value.
925+
Ok(String::new())
926+
}
927+
});
928+
Ok(out_property.into_dyn_wrapped())
929+
},
930+
);
912931
map.insert(
913932
"starts_with",
914933
|language, diagnostics, build_ctx, self_property, function| {
@@ -2990,6 +3009,32 @@ mod tests {
29903009
insta::assert_snapshot!(
29913010
env.render_ok(r#""foo".contains(separate("o", "f", bad_string))"#), @"<Error: Bad>");
29923011

3012+
insta::assert_snapshot!(env.render_ok(r#""fooo".match(regex:'[a-f]o+')"#), @"fooo");
3013+
insta::assert_snapshot!(env.render_ok(r#""fa".match(regex:'[a-f]o+')"#), @"");
3014+
insta::assert_snapshot!(env.render_ok(r#""hello".match(regex:"h(ell)o")"#), @"hello");
3015+
insta::assert_snapshot!(env.render_ok(r#""HEllo".match(regex-i:"h(ell)o")"#), @"HEllo");
3016+
insta::assert_snapshot!(env.render_ok(r#""hEllo".match(glob:"h*o")"#), @"hEllo");
3017+
insta::assert_snapshot!(env.render_ok(r#""Hello".match(glob:"h*o")"#), @"");
3018+
insta::assert_snapshot!(env.render_ok(r#""HEllo".match(glob-i:"h*o")"#), @"HEllo");
3019+
insta::assert_snapshot!(env.render_ok(r#""hello".match("he")"#), @"he");
3020+
insta::assert_snapshot!(env.render_ok(r#""hello".match(substring:"he")"#), @"he");
3021+
insta::assert_snapshot!(env.render_ok(r#""hello".match(exact:"he")"#), @"");
3022+
3023+
// Evil regexes can cause invalid UTF-8 output, which nothing can
3024+
// really be done about given we're matching against non-UTF-8 stuff a
3025+
// lot as well.
3026+
insta::assert_snapshot!(env.render_ok(r#""🥺".match(regex:'(?-u)^(?:.)')"#), @"<Error: incomplete utf-8 byte sequence from index 0>");
3027+
3028+
insta::assert_snapshot!(env.parse_err(r#""🥺".match(not-a-pattern:"abc")"#), @r#"
3029+
--> 1:11
3030+
|
3031+
1 | "🥺".match(not-a-pattern:"abc")
3032+
| ^-----------------^
3033+
|
3034+
= Bad string pattern
3035+
Invalid string pattern kind `not-a-pattern:`
3036+
"#);
3037+
29933038
insta::assert_snapshot!(env.render_ok(r#""".first_line()"#), @"");
29943039
insta::assert_snapshot!(env.render_ok(r#""foo\nbar".first_line()"#), @"foo");
29953040

docs/templates.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -426,6 +426,10 @@ defined.
426426
* `.len() -> Integer`: Length in UTF-8 bytes.
427427
* `.contains(needle: Stringify) -> Boolean`: Whether the string contains the
428428
provided stringifiable value as a substring.
429+
* `.match(needle: StringPattern) -> String`: Extracts
430+
the matching part of the string for the given pattern.
431+
432+
An empty string is returned if there is no match.
429433
* `.first_line() -> String`
430434
* `.lines() -> List<String>`: Split into lines excluding newline characters.
431435
* `.upper() -> String`

lib/src/str_util.rs

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,11 @@ impl GlobPattern {
6060
pub fn as_str(&self) -> &str {
6161
self.glob.glob()
6262
}
63+
64+
/// Converts this glob pattern to a bytes regex.
65+
pub fn to_regex(&self) -> regex::bytes::Regex {
66+
self.regex.clone()
67+
}
6368
}
6469

6570
impl Debug for GlobPattern {
@@ -289,6 +294,35 @@ impl StringPattern {
289294
}
290295
}
291296

297+
/// Converts the pattern into a bytes regex.
298+
pub fn to_regex(&self) -> regex::bytes::Regex {
299+
match self {
300+
Self::Exact(literal) => {
301+
regex::bytes::RegexBuilder::new(&format!("^{}$", regex::escape(literal)))
302+
.build()
303+
.expect("impossible to fail to compile regex of literal")
304+
}
305+
Self::ExactI(literal) => {
306+
regex::bytes::RegexBuilder::new(&format!("^{}$", regex::escape(literal)))
307+
.case_insensitive(true)
308+
.build()
309+
.expect("impossible to fail to compile regex of literal")
310+
}
311+
Self::Substring(literal) => regex::bytes::RegexBuilder::new(&regex::escape(literal))
312+
.build()
313+
.expect("impossible to fail to compile regex of literal"),
314+
Self::SubstringI(literal) => regex::bytes::RegexBuilder::new(&regex::escape(literal))
315+
.case_insensitive(true)
316+
.build()
317+
.expect("impossible to fail to compile regex of literal"),
318+
Self::Glob(glob_pattern) => glob_pattern.to_regex(),
319+
// The regex generated represents the case insensitivity itself
320+
Self::GlobI(glob_pattern) => glob_pattern.to_regex(),
321+
Self::Regex(regex) => regex.clone(),
322+
Self::RegexI(regex) => regex.clone(),
323+
}
324+
}
325+
292326
/// Iterates entries of the given `map` whose string keys match this
293327
/// pattern.
294328
pub fn filter_btree_map<'a, 'b, K: Borrow<str> + Ord, V>(
@@ -487,4 +521,24 @@ mod tests {
487521
.is_match("\u{c0}")
488522
);
489523
}
524+
525+
#[test]
526+
fn test_string_pattern_to_regex() {
527+
let check = |pattern: StringPattern, match_to: &str| {
528+
let regex = pattern.to_regex();
529+
regex.is_match(match_to.as_bytes())
530+
};
531+
assert!(check(StringPattern::exact("a"), "a"));
532+
assert!(!check(StringPattern::exact("a"), "A"));
533+
assert!(!check(StringPattern::exact("a"), "aa"));
534+
assert!(check(StringPattern::exact_i("a"), "A"));
535+
assert!(check(StringPattern::substring("a"), "abc"));
536+
assert!(!check(StringPattern::substring("a"), "Abc"));
537+
assert!(check(StringPattern::substring_i("a"), "Abc"));
538+
assert!(!check(StringPattern::glob("a").unwrap(), "A"));
539+
assert!(check(StringPattern::glob_i("a").unwrap(), "A"));
540+
assert!(check(StringPattern::regex("^a{1,3}").unwrap(), "abcde"));
541+
assert!(!check(StringPattern::regex("^a{1,3}").unwrap(), "Abcde"));
542+
assert!(check(StringPattern::regex_i("^a{1,3}").unwrap(), "Abcde"));
543+
}
490544
}

0 commit comments

Comments
 (0)