Skip to content

Commit 1a1e6ed

Browse files
committed
templates: add string.match function
This allows for any matcher type and allows extracting a capture group by number.
1 parent ea91225 commit 1a1e6ed

File tree

4 files changed

+108
-0
lines changed

4 files changed

+108
-0
lines changed

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,9 @@ to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
104104
output using template expressions, similar to `jj op log`. Also added
105105
`--no-op-diff` flag to suppress the operation diff.
106106

107+
* An identical string pattern system as revsets is now supported in the
108+
template language, and is exposed as `string.match(pattern)`.
109+
107110
* `jj squash` has gained `--insert-before`, `--insert-after`, and `--destination`
108111
options.
109112

cli/src/template_builder.rs

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -909,6 +909,27 @@ fn builtin_string_methods<'a, L: TemplateLanguage<'a> + ?Sized>()
909909
Ok(out_property.into_dyn_wrapped())
910910
},
911911
);
912+
map.insert(
913+
"match",
914+
|_language, _diagnostics, _build_ctx, self_property, function| {
915+
let [needle_node] = function.expect_exact_arguments()?;
916+
let needle = template_parser::expect_string_pattern(needle_node)?;
917+
918+
let out_property = self_property.and_then(move |haystack| {
919+
let regex = needle.to_regex();
920+
let match_ = regex.find(haystack.as_bytes());
921+
922+
if let Some(m) = match_ {
923+
Ok(std::str::from_utf8(m.as_bytes())?.to_owned())
924+
} else {
925+
// We don't have optional strings, so empty string is the
926+
// right null value.
927+
Ok(String::new())
928+
}
929+
});
930+
Ok(out_property.into_dyn_wrapped())
931+
},
932+
);
912933
map.insert(
913934
"starts_with",
914935
|language, diagnostics, build_ctx, self_property, function| {
@@ -2987,6 +3008,32 @@ mod tests {
29873008
insta::assert_snapshot!(
29883009
env.render_ok(r#""foo".contains(separate("o", "f", bad_string))"#), @"<Error: Bad>");
29893010

3011+
insta::assert_snapshot!(env.render_ok(r#""fooo".match(regex:'[a-f]o+')"#), @"fooo");
3012+
insta::assert_snapshot!(env.render_ok(r#""fa".match(regex:'[a-f]o+')"#), @"");
3013+
insta::assert_snapshot!(env.render_ok(r#""hello".match(regex:"h(ell)o")"#), @"hello");
3014+
insta::assert_snapshot!(env.render_ok(r#""HEllo".match(regex-i:"h(ell)o")"#), @"HEllo");
3015+
insta::assert_snapshot!(env.render_ok(r#""hEllo".match(glob:"h*o")"#), @"hEllo");
3016+
insta::assert_snapshot!(env.render_ok(r#""Hello".match(glob:"h*o")"#), @"");
3017+
insta::assert_snapshot!(env.render_ok(r#""HEllo".match(glob-i:"h*o")"#), @"HEllo");
3018+
insta::assert_snapshot!(env.render_ok(r#""hello".match("he")"#), @"he");
3019+
insta::assert_snapshot!(env.render_ok(r#""hello".match(substring:"he")"#), @"he");
3020+
insta::assert_snapshot!(env.render_ok(r#""hello".match(exact:"he")"#), @"");
3021+
3022+
// Evil regexes can cause invalid UTF-8 output, which nothing can
3023+
// really be done about given we're matching against non-UTF-8 stuff a
3024+
// lot as well.
3025+
insta::assert_snapshot!(env.render_ok(r#""🥺".match(regex:'(?-u)^(?:.)')"#), @"<Error: incomplete utf-8 byte sequence from index 0>");
3026+
3027+
insta::assert_snapshot!(env.parse_err(r#""🥺".match(not-a-pattern:"abc")"#), @r#"
3028+
--> 1:11
3029+
|
3030+
1 | "🥺".match(not-a-pattern:"abc")
3031+
| ^-----------------^
3032+
|
3033+
= Bad string pattern
3034+
Invalid string pattern kind `not-a-pattern:`
3035+
"#);
3036+
29903037
insta::assert_snapshot!(env.render_ok(r#""".first_line()"#), @"");
29913038
insta::assert_snapshot!(env.render_ok(r#""foo\nbar".first_line()"#), @"foo");
29923039

docs/templates.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -426,6 +426,10 @@ defined.
426426
* `.len() -> Integer`: Length in UTF-8 bytes.
427427
* `.contains(needle: Stringify) -> Boolean`: Whether the string contains the
428428
provided stringifiable value as a substring.
429+
* `.match(needle: StringPattern) -> Boolean`: Extracts
430+
the matching part of the string for the given pattern.
431+
432+
An empty string is returned if there is no match.
429433
* `.first_line() -> String`
430434
* `.lines() -> List<String>`: Split into lines excluding newline characters.
431435
* `.upper() -> String`

lib/src/str_util.rs

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,11 @@ impl GlobPattern {
6060
pub fn as_str(&self) -> &str {
6161
self.glob.glob()
6262
}
63+
64+
/// Converts this glob pattern to a bytes regex.
65+
pub fn to_regex(&self) -> regex::bytes::Regex {
66+
self.regex.clone()
67+
}
6368
}
6469

6570
impl Debug for GlobPattern {
@@ -289,6 +294,35 @@ impl StringPattern {
289294
}
290295
}
291296

297+
/// Converts the pattern into a bytes regex.
298+
pub fn to_regex(&self) -> regex::bytes::Regex {
299+
match self {
300+
Self::Exact(literal) => {
301+
regex::bytes::RegexBuilder::new(&format!("^{}$", regex::escape(literal)))
302+
.build()
303+
.expect("impossible to fail to compile regex of literal")
304+
}
305+
Self::ExactI(literal) => {
306+
regex::bytes::RegexBuilder::new(&format!("^{}$", regex::escape(literal)))
307+
.case_insensitive(true)
308+
.build()
309+
.expect("impossible to fail to compile regex of literal")
310+
}
311+
Self::Substring(literal) => regex::bytes::RegexBuilder::new(&regex::escape(literal))
312+
.build()
313+
.expect("impossible to fail to compile regex of literal"),
314+
Self::SubstringI(literal) => regex::bytes::RegexBuilder::new(&regex::escape(literal))
315+
.case_insensitive(true)
316+
.build()
317+
.expect("impossible to fail to compile regex of literal"),
318+
Self::Glob(glob_pattern) => glob_pattern.to_regex(),
319+
// The regex generated represents the case insensitivity itself
320+
Self::GlobI(glob_pattern) => glob_pattern.to_regex(),
321+
Self::Regex(regex) => regex.clone(),
322+
Self::RegexI(regex) => regex.clone(),
323+
}
324+
}
325+
292326
/// Iterates entries of the given `map` whose string keys match this
293327
/// pattern.
294328
pub fn filter_btree_map<'a, 'b, K: Borrow<str> + Ord, V>(
@@ -487,4 +521,24 @@ mod tests {
487521
.is_match("\u{c0}")
488522
);
489523
}
524+
525+
#[test]
526+
fn test_string_pattern_to_regex() {
527+
let check = |pattern: StringPattern, match_to: &str| {
528+
let regex = pattern.to_regex();
529+
regex.is_match(match_to.as_bytes())
530+
};
531+
assert!(check(StringPattern::exact("a"), "a"));
532+
assert!(!check(StringPattern::exact("a"), "A"));
533+
assert!(!check(StringPattern::exact("a"), "aa"));
534+
assert!(check(StringPattern::exact_i("a"), "A"));
535+
assert!(check(StringPattern::substring("a"), "abc"));
536+
assert!(!check(StringPattern::substring("a"), "Abc"));
537+
assert!(check(StringPattern::substring_i("a"), "Abc"));
538+
assert!(!check(StringPattern::glob("a").unwrap(), "A"));
539+
assert!(check(StringPattern::glob_i("a").unwrap(), "A"));
540+
assert!(check(StringPattern::regex("^a{1,3}").unwrap(), "abcde"));
541+
assert!(!check(StringPattern::regex("^a{1,3}").unwrap(), "Abcde"));
542+
assert!(check(StringPattern::regex_i("^a{1,3}").unwrap(), "Abcde"));
543+
}
490544
}

0 commit comments

Comments
 (0)