Skip to content

Commit a93bd87

Browse files
committed
templates: support string patterns in template language
This is a basic implementation of the same string pattern system as in the revset language. It's currently only used for `string.matches`, so you can now do: ``` "foo".matches(regex:'[a-f]o+') ``` In the future this could be added to more string functions (and e.g. the ability to parse things out of strings could be added). CC: #6893
1 parent 61444b1 commit a93bd87

File tree

4 files changed

+144
-5
lines changed

4 files changed

+144
-5
lines changed

cli/src/template.pest

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@ string_literal = ${ "\"" ~ (string_content | string_escape)* ~ "\"" }
3333
raw_string_content = @{ (!"'" ~ ANY)* }
3434
raw_string_literal = ${ "'" ~ raw_string_content ~ "'" }
3535

36+
any_string_literal = _{ string_literal | raw_string_literal }
37+
3638
integer_literal = @{
3739
ASCII_NONZERO_DIGIT ~ ASCII_DIGIT*
3840
| "0"
@@ -56,6 +58,7 @@ div_op = { "/" }
5658
rem_op = { "%" }
5759
logical_not_op = { "!" }
5860
negate_op = { "-" }
61+
pattern_kind_op = { ":" }
5962
prefix_ops = _{ logical_not_op | negate_op }
6063
infix_ops = _{
6164
logical_or_op
@@ -88,10 +91,21 @@ formal_parameters = {
8891
| ""
8992
}
9093

94+
// NOTE: string pattern identifiers additionally allow "-" in them, which
95+
// results in some oddness with the `-` operator, though does not yet cause
96+
// ambiguity. This may prove annoying at some future point.
97+
string_pattern_identifier = @{ (ASCII_ALPHA | "_") ~ (ASCII_ALPHANUMERIC | "_" | "-")* }
98+
string_pattern = {
99+
// Unlike the revset language, we're not allowing bare words here because
100+
// templates are generally not written on-the-fly.
101+
string_pattern_identifier ~ pattern_kind_op ~ any_string_literal
102+
}
103+
91104
primary = _{
92105
("(" ~ template ~ ")")
93106
| function
94107
| lambda
108+
| string_pattern
95109
| identifier
96110
| string_literal
97111
| raw_string_literal

cli/src/template_builder.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1946,6 +1946,10 @@ pub fn build_expression<'a, L: TemplateLanguage<'a> + ?Sized>(
19461946
let property = Literal(value.clone()).into_dyn_wrapped();
19471947
Ok(Expression::unlabeled(property))
19481948
}
1949+
ExpressionKind::StringPattern { .. } => Err(TemplateParseError::expression(
1950+
"String patterns may not be used as expression values",
1951+
node.span,
1952+
)),
19491953
ExpressionKind::Unary(op, arg_node) => {
19501954
let property = build_unary_operation(language, diagnostics, build_ctx, *op, arg_node)?;
19511955
Ok(Expression::unlabeled(property))
@@ -2969,6 +2973,16 @@ mod tests {
29692973
env.render_ok(r#""description 123".contains(description.first_line())"#),
29702974
@"true");
29712975

2976+
// String patterns are not stringifiable
2977+
insta::assert_snapshot!(env.parse_err(r#""fa".starts_with(regex:'[a-f]o+')"#), @r#"
2978+
--> 1:18
2979+
|
2980+
1 | "fa".starts_with(regex:'[a-f]o+')
2981+
| ^-------------^
2982+
|
2983+
= String patterns may not be used as expression values
2984+
"#);
2985+
29722986
// inner template error should propagate
29732987
insta::assert_snapshot!(env.render_ok(r#""foo".contains(bad_string)"#), @"<Error: Bad>");
29742988
insta::assert_snapshot!(

cli/src/template_parser.rs

Lines changed: 99 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ use jj_lib::dsl_util::FunctionCallParser;
3333
use jj_lib::dsl_util::InvalidArguments;
3434
use jj_lib::dsl_util::StringLiteralParser;
3535
use jj_lib::dsl_util::collect_similar;
36+
use jj_lib::str_util::StringPattern;
3637
use pest::Parser as _;
3738
use pest::iterators::Pair;
3839
use pest::iterators::Pairs;
@@ -69,8 +70,11 @@ impl Rule {
6970
Self::string_literal => None,
7071
Self::raw_string_content => None,
7172
Self::raw_string_literal => None,
73+
Self::any_string_literal => None,
74+
Self::string_pattern => None,
7275
Self::integer_literal => None,
7376
Self::identifier => None,
77+
Self::string_pattern_identifier => None,
7478
Self::concat_op => Some("++"),
7579
Self::logical_or_op => Some("||"),
7680
Self::logical_and_op => Some("&&"),
@@ -87,6 +91,7 @@ impl Rule {
8791
Self::rem_op => Some("%"),
8892
Self::logical_not_op => Some("!"),
8993
Self::negate_op => Some("-"),
94+
Self::pattern_kind_op => Some(":"),
9095
Self::prefix_ops => None,
9196
Self::infix_ops => None,
9297
Self::function => None,
@@ -285,6 +290,11 @@ pub enum ExpressionKind<'i> {
285290
Boolean(bool),
286291
Integer(i64),
287292
String(String),
293+
/// `<kind>:"<value>"`
294+
StringPattern {
295+
kind: &'i str,
296+
value: String,
297+
},
288298
Unary(UnaryOp, Box<ExpressionNode<'i>>),
289299
Binary(BinaryOp, Box<ExpressionNode<'i>>, Box<ExpressionNode<'i>>),
290300
Concat(Vec<ExpressionNode<'i>>),
@@ -302,7 +312,10 @@ impl<'i> FoldableExpression<'i> for ExpressionKind<'i> {
302312
{
303313
match self {
304314
Self::Identifier(name) => folder.fold_identifier(name, span),
305-
Self::Boolean(_) | Self::Integer(_) | Self::String(_) => Ok(self),
315+
ExpressionKind::Boolean(_)
316+
| ExpressionKind::Integer(_)
317+
| ExpressionKind::String(_)
318+
| ExpressionKind::StringPattern { .. } => Ok(self),
306319
Self::Unary(op, arg) => {
307320
let arg = Box::new(folder.fold_expression(*arg)?);
308321
Ok(Self::Unary(op, arg))
@@ -458,6 +471,12 @@ fn parse_lambda_node(pair: Pair<Rule>) -> TemplateParseResult<LambdaNode> {
458471
})
459472
}
460473

474+
fn parse_raw_string_literal(pair: Pair<Rule>) -> String {
475+
let [content] = pair.into_inner().collect_array().unwrap();
476+
assert_eq!(content.as_rule(), Rule::raw_string_content);
477+
content.as_str().to_owned()
478+
}
479+
461480
fn parse_term_node(pair: Pair<Rule>) -> TemplateParseResult<ExpressionNode> {
462481
assert_eq!(pair.as_rule(), Rule::term);
463482
let mut inner = pair.into_inner();
@@ -469,9 +488,7 @@ fn parse_term_node(pair: Pair<Rule>) -> TemplateParseResult<ExpressionNode> {
469488
ExpressionNode::new(ExpressionKind::String(text), span)
470489
}
471490
Rule::raw_string_literal => {
472-
let [content] = expr.into_inner().collect_array().unwrap();
473-
assert_eq!(content.as_rule(), Rule::raw_string_content);
474-
let text = content.as_str().to_owned();
491+
let text = parse_raw_string_literal(expr);
475492
ExpressionNode::new(ExpressionKind::String(text), span)
476493
}
477494
Rule::integer_literal => {
@@ -480,6 +497,21 @@ fn parse_term_node(pair: Pair<Rule>) -> TemplateParseResult<ExpressionNode> {
480497
})?;
481498
ExpressionNode::new(ExpressionKind::Integer(value), span)
482499
}
500+
Rule::string_pattern => {
501+
let [kind, op, literal] = expr.into_inner().collect_array().unwrap();
502+
assert_eq!(kind.as_rule(), Rule::string_pattern_identifier);
503+
assert_eq!(op.as_rule(), Rule::pattern_kind_op);
504+
let kind = kind.as_str();
505+
let text = match literal.as_rule() {
506+
Rule::string_literal => STRING_LITERAL_PARSER.parse(literal.into_inner()),
507+
Rule::raw_string_literal => parse_raw_string_literal(literal),
508+
other => {
509+
panic!("Unexpected literal rule in string pattern: {other:?}")
510+
}
511+
};
512+
// The actual parsing and construction of the pattern is deferred to later.
513+
ExpressionNode::new(ExpressionKind::StringPattern { kind, value: text }, span)
514+
}
483515
Rule::identifier => ExpressionNode::new(parse_identifier_or_literal(expr), span),
484516
Rule::function => {
485517
let function = Box::new(FUNCTION_CALL_PARSER.parse(
@@ -663,6 +695,23 @@ pub fn expect_string_literal<'a>(node: &'a ExpressionNode<'_>) -> TemplateParseR
663695
})
664696
}
665697

698+
/// Unwraps inner value if the given `node` is a string pattern
699+
///
700+
/// This forces it to be static so that it need not be part of the type system.
701+
pub fn expect_string_pattern(node: &'_ ExpressionNode<'_>) -> TemplateParseResult<StringPattern> {
702+
catch_aliases_no_diagnostics(node, |node| match &node.kind {
703+
ExpressionKind::StringPattern { kind, value } => StringPattern::from_str_kind(value, kind)
704+
.map_err(|err| {
705+
TemplateParseError::expression("Bad string pattern", node.span).with_source(err)
706+
}),
707+
ExpressionKind::String(string) => Ok(StringPattern::Substring(string.clone())),
708+
_ => Err(TemplateParseError::expression(
709+
"Expected string pattern",
710+
node.span,
711+
)),
712+
})
713+
}
714+
666715
/// Unwraps inner node if the given `node` is a lambda.
667716
pub fn expect_lambda<'a, 'i>(
668717
node: &'a ExpressionNode<'i>,
@@ -835,6 +884,7 @@ mod tests {
835884
| ExpressionKind::Boolean(_)
836885
| ExpressionKind::Integer(_)
837886
| ExpressionKind::String(_) => node.kind,
887+
ExpressionKind::StringPattern { .. } => node.kind,
838888
ExpressionKind::Unary(op, arg) => {
839889
let arg = Box::new(normalize_tree(*arg));
840890
ExpressionKind::Unary(op, arg)
@@ -1139,6 +1189,51 @@ mod tests {
11391189
);
11401190
}
11411191

1192+
#[test]
1193+
fn test_string_pattern() {
1194+
assert_eq!(
1195+
parse_into_kind(r#"regex:"meow""#),
1196+
Ok(ExpressionKind::StringPattern {
1197+
kind: "regex",
1198+
value: "meow".to_owned()
1199+
}),
1200+
);
1201+
assert_eq!(
1202+
parse_into_kind(r#"regex:'\r\n'"#),
1203+
Ok(ExpressionKind::StringPattern {
1204+
kind: "regex",
1205+
value: r#"\r\n"#.to_owned()
1206+
})
1207+
);
1208+
assert_eq!(
1209+
parse_into_kind(r#"regex-i:'\r\n'"#),
1210+
Ok(ExpressionKind::StringPattern {
1211+
kind: "regex-i",
1212+
value: r#"\r\n"#.to_owned()
1213+
})
1214+
);
1215+
assert_eq!(
1216+
parse_into_kind("regex:meow"),
1217+
Err(TemplateParseErrorKind::SyntaxError),
1218+
"no bare words in string patterns in templates"
1219+
);
1220+
assert_eq!(
1221+
parse_into_kind("regex: 'with spaces'"),
1222+
Err(TemplateParseErrorKind::SyntaxError),
1223+
"no spaces after"
1224+
);
1225+
assert_eq!(
1226+
parse_into_kind("regex :'with spaces'"),
1227+
Err(TemplateParseErrorKind::SyntaxError),
1228+
"no spaces before either"
1229+
);
1230+
assert_eq!(
1231+
parse_into_kind("regex : 'with spaces'"),
1232+
Err(TemplateParseErrorKind::SyntaxError),
1233+
"certainly not both"
1234+
);
1235+
}
1236+
11421237
#[test]
11431238
fn test_integer_literal() {
11441239
assert_eq!(parse_into_kind("0"), Ok(ExpressionKind::Integer(0)));

docs/templates.md

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -424,7 +424,8 @@ A string can be implicitly converted to `Boolean`. The following methods are
424424
defined.
425425

426426
* `.len() -> Integer`: Length in UTF-8 bytes.
427-
* `.contains(needle: Stringify) -> Boolean`
427+
* `.contains(needle: Stringify) -> Boolean`: Whether the string contains the
428+
provided stringifiable value as a substring.
428429
* `.first_line() -> String`
429430
* `.lines() -> List<String>`: Split into lines excluding newline characters.
430431
* `.upper() -> String`
@@ -474,6 +475,21 @@ An expression that can be converted to a `String`.
474475
Any types that can be converted to `Template` can also be `Stringify`. Unlike
475476
`Template`, color labels are stripped.
476477

478+
### `StringPattern` type
479+
480+
_Conversion: `Boolean`: no, `Serialize`: no, `Template`: no_
481+
482+
These are the exact same as the [String pattern type] in revsets, except that
483+
quotes are mandatory.
484+
485+
Literal strings may be used, which are interpreted as case-sensitive substring
486+
matching.
487+
488+
Currently `StringPattern` values cannot be passed around as values and may
489+
only occur directly in the call site they are used in.
490+
491+
[String pattern type]: revsets.md#string-patterns
492+
477493
### `Template` type
478494

479495
_Conversion: `Boolean`: no, `Serialize`: no, `Template`: yes_

0 commit comments

Comments
 (0)