Skip to content

Commit

Permalink
fix: better downstream and identifier handling (#3757)
Browse files Browse the repository at this point in the history
  • Loading branch information
tobymao authored and izeigerman committed Jan 31, 2025
1 parent ad66273 commit 679c0fd
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 21 deletions.
50 changes: 29 additions & 21 deletions sqlmesh/core/selector.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@

from sqlglot import exp
from sqlglot.errors import ParseError
from sqlglot.tokens import Token, Tokenizer, TokenType
from sqlglot.dialects.dialect import DialectType
from sqlglot.tokens import Token, TokenType, Tokenizer as BaseTokenizer
from sqlglot.dialects.dialect import Dialect, DialectType
from sqlglot.helper import seq_get

from sqlmesh.core.dialect import normalize_model_name
Expand Down Expand Up @@ -230,20 +230,25 @@ def evaluate(node: exp.Expression) -> t.Set[str]:
return evaluate(node)


class SelectorTokenizer(Tokenizer):
SINGLE_TOKENS = {
"(": TokenType.L_PAREN,
")": TokenType.R_PAREN,
"&": TokenType.AMP,
"|": TokenType.PIPE,
"^": TokenType.CARET,
"+": TokenType.PLUS,
"*": TokenType.STAR,
":": TokenType.COLON,
}
class SelectorDialect(Dialect):
IDENTIFIERS_CAN_START_WITH_DIGIT = True

KEYWORDS = {}
IDENTIFIERS: t.List[str | t.Tuple[str, str]] = []
class Tokenizer(BaseTokenizer):
SINGLE_TOKENS = {
"(": TokenType.L_PAREN,
")": TokenType.R_PAREN,
"&": TokenType.AMP,
"|": TokenType.PIPE,
"^": TokenType.CARET,
"+": TokenType.PLUS,
"*": TokenType.STAR,
":": TokenType.COLON,
}

KEYWORDS = {}
IDENTIFIERS = ["\\"] # there are no identifiers but need to put something here
IDENTIFIER_START = ""
IDENTIFIER_END = ""


class Git(exp.Expression):
Expand All @@ -259,7 +264,7 @@ class Direction(exp.Expression):


def parse(selector: str, dialect: DialectType = None) -> exp.Expression:
tokens = SelectorTokenizer().tokenize(selector)
tokens = SelectorDialect().tokenize(selector)
i = 0

def _curr() -> t.Optional[Token]:
Expand Down Expand Up @@ -304,29 +309,32 @@ def _parse_kind(kind: str) -> bool:

def _parse_var() -> exp.Expression:
upstream = _match(TokenType.PLUS)
downstream = None
tag = _parse_kind("tag")
git = False if tag else _parse_kind("git")
lstar = "*" if _match(TokenType.STAR) else ""
directions = {}

if _match(TokenType.VAR):
if _match(TokenType.VAR) or _match(TokenType.NUMBER):
name = _prev().text
rstar = "*" if _match(TokenType.STAR) else ""
downstream = _match(TokenType.PLUS)
this: exp.Expression = exp.Var(this=f"{lstar}{name}{rstar}")

if upstream:
directions["up"] = True
if downstream:
directions["down"] = True
elif _match(TokenType.L_PAREN):
this = exp.Paren(this=_parse_conjunction())
downstream = _match(TokenType.PLUS)
_match(TokenType.R_PAREN, True)
elif lstar:
this = exp.var("*")
else:
raise ParseError(_error("Expected model name."))

if upstream:
directions["up"] = True
if downstream:
directions["down"] = True

if tag:
this = Tag(this=this)
if git:
Expand Down
32 changes: 32 additions & 0 deletions tests/core/test_selector.py
Original file line number Diff line number Diff line change
Expand Up @@ -512,6 +512,38 @@ def test_select_models_missing_env(mocker: MockerFixture, make_snapshot):
["model* & ^(tag:tag1 | tag:tag2)"],
{'"model3"'},
),
(
[
("model1", "tag1", None),
("model2", "tag2", {"model1"}),
("model3", "tag3", {"model1"}),
],
["(model1*)+"],
{'"model1"', '"model2"', '"model3"'},
),
(
[
("model1", "tag1", None),
("model2", "tag2", {"model1"}),
("model3", "tag3", {"model2"}),
],
["+(+model2*+)+"],
{'"model1"', '"model2"', '"model3"'},
),
(
[
("model1", "tag1", None),
("model2", "tag2", {"model1"}),
("model3", "tag3", {"model1"}),
],
["(model* & ^*1)+"],
{'"model2"', '"model3"'},
),
(
[("model2", "tag1", None), ("model2_1", "tag2", None), ("model2_2", "tag3", None)],
["*2_*"],
{'"model2_1"', '"model2_2"'},
),
],
)
def test_expand_model_selections(
Expand Down

0 comments on commit 679c0fd

Please sign in to comment.