Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions whelk-core/src/main/groovy/whelk/search2/parse/Ast.java
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,7 @@ public Ast(Parse.OrComb orComb) throws InvalidQueryException {
}

public static Node buildFrom(Parse.OrComb orComb) throws InvalidQueryException {
Node ast = reduce(orComb);
Analysis.checkSemantics(ast);
return ast;
return reduce(orComb);
}

private static Node reduce(Parse.OrComb orComb) throws InvalidQueryException {
Expand Down
3 changes: 3 additions & 0 deletions whelk-core/src/main/groovy/whelk/search2/parse/Lex.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ public enum TokenName {
}

public record Symbol(TokenName name, String value, int offset) {
public boolean isQuoted() {
return TokenName.QUOTED_STRING.equals(name);
}
}

public static LinkedList<Symbol> lexQuery(String queryString) throws InvalidQueryException {
Expand Down
163 changes: 136 additions & 27 deletions whelk-core/src/main/groovy/whelk/search2/querytree/QueryTreeBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import java.util.LinkedList;
import java.util.List;
import java.util.Optional;
import java.util.function.Predicate;

public class QueryTreeBuilder {
public static Node buildTree(String queryString, Disambiguate disambiguate) throws InvalidQueryException {
Expand All @@ -20,20 +21,17 @@ public static Node buildTree(String queryString, Disambiguate disambiguate) thro
} else if (queryString.equals(Operator.WILDCARD)) {
return new Any.Wildcard();
}
return buildTree(getAst(queryString).tree, disambiguate, null, null);
return buildTree(getAst(queryString).tree, disambiguate, null, null, queryString);
}

private static Node buildTree(Ast.Node astNode, Disambiguate disambiguate, Selector selector, Operator operator) throws InvalidQueryException {
private static Node buildTree(Ast.Node astNode, Disambiguate disambiguate, MappedCode mc, Operator operator, String q) throws InvalidQueryException {
return switch (astNode) {
case Ast.Group g -> buildFromGroup(g, disambiguate, selector, operator);
case Ast.Not n -> buildFromNot(n, disambiguate, selector, operator);
case Ast.Leaf l -> buildFromLeaf(l, disambiguate, selector, operator);
case Ast.Code c -> {
if (selector != null) {
throw new InvalidQueryException("Codes within code groups are not allowed.");
}
yield buildFromCode(c, disambiguate);
}
case Ast.Group g -> buildFromGroup(g, disambiguate, mc, operator, q);
case Ast.Not n -> buildFromNot(n, disambiguate, mc, operator, q);
case Ast.Leaf l -> buildFromLeaf(l, disambiguate, mc, operator);
case Ast.Code c -> mc != null
? asFreeText(mc.astCode(), q, disambiguate.getTextQueryProperty()) // Codes within code groups are not allowed, treat the whole code segment as free text
: buildFromCode(c, disambiguate, q);
};
}

Expand All @@ -43,10 +41,10 @@ private static Ast getAst(String queryString) throws InvalidQueryException {
return new Ast(parseTree);
}

private static Node buildFromGroup(Ast.Group group, Disambiguate disambiguate, Selector selector, Operator operator) throws InvalidQueryException {
private static Node buildFromGroup(Ast.Group group, Disambiguate disambiguate, MappedCode mc, Operator operator, String q) throws InvalidQueryException {
if (group.operands().isEmpty()) {
return selector != null
? new Condition(selector, operator, new Any.EmptyGroup())
return mc != null
? new Condition(mc.selector(), operator, new Any.EmptyGroup())
: new Any.EmptyGroup();
}

Expand All @@ -57,17 +55,34 @@ private static Node buildFromGroup(Ast.Group group, Disambiguate disambiguate, S
for (int i = 0; i < group.operands().size(); i++) {
Ast.Node o = group.operands().get(i);
if (o instanceof Ast.Leaf leaf) {
Node node = buildFromLeaf(leaf, disambiguate, selector, operator);
Node node = buildFromLeaf(leaf, disambiguate, mc, operator);
switch (node) {
case FreeText ft -> freeTextTokens.add(ft.tokens().getFirst());
case Condition c when c.value() instanceof FreeText ft -> freeTextTokens.add(ft.tokens().getFirst());
default -> children.add(node);
}
if (!freeTextTokens.isEmpty() && freeTextStartIdx == -1) {
freeTextStartIdx = i;
}
/*
* Normally, only Ast.Leaf nodes produce FreeText. However, Ast.Code may also
* yield FreeText when the key is invalid, so we must handle those cases here
* as well when merging free-text tokens.
*/
else if (o instanceof Ast.Code c) {
if (mc != null) {
// Codes within code groups are not allowed, return the whole code group as free text
return asFreeText(mc.astCode(), q, disambiguate.getTextQueryProperty());
}
Node node = buildFromCode(c, disambiguate, q);
if (node instanceof FreeText ft) {
freeTextTokens.add(ft.tokens().getFirst());
} else {
children.add(node);
}
} else {
children.add(buildTree(o, disambiguate, selector, operator));
children.add(buildTree(o, disambiguate, mc, operator, q));
}
if (!freeTextTokens.isEmpty() && freeTextStartIdx == -1) {
freeTextStartIdx = i;
}
}

Expand All @@ -78,7 +93,7 @@ private static Node buildFromGroup(Ast.Group group, Disambiguate disambiguate, S
};

FreeText freeText = new FreeText(disambiguate.getTextQueryProperty(), freeTextTokens, connective);
Node node = selector != null ? new Condition(selector, operator, freeText) : freeText;
Node node = mc != null ? new Condition(mc.selector(), operator, freeText) : freeText;

if (children.isEmpty()) {
return node;
Expand All @@ -93,13 +108,13 @@ private static Node buildFromGroup(Ast.Group group, Disambiguate disambiguate, S
};
}

private static Node buildFromNot(Ast.Not not, Disambiguate disambiguate, Selector selector, Operator operator) throws InvalidQueryException {
return buildTree(not.operand(), disambiguate, selector, operator).getInverse();
private static Node buildFromNot(Ast.Not not, Disambiguate disambiguate, MappedCode mc, Operator operator, String q) throws InvalidQueryException {
return buildTree(not.operand(), disambiguate, mc, operator, q).getInverse();
}

private static Node buildFromLeaf(Ast.Leaf leaf, Disambiguate disambiguate, Selector selector, Operator operator) throws InvalidQueryException {
if (selector != null) {
return buildCondition(selector, operator, leaf, disambiguate);
private static Node buildFromLeaf(Ast.Leaf leaf, Disambiguate disambiguate, MappedCode mc, Operator operator) throws InvalidQueryException {
if (mc != null) {
return buildCondition(mc.selector(), operator, leaf, disambiguate);
}

Lex.Symbol symbol = leaf.value();
Expand All @@ -114,9 +129,11 @@ private static Node buildFromLeaf(Ast.Leaf leaf, Disambiguate disambiguate, Sele
return new FreeText(disambiguate.getTextQueryProperty(), getToken(symbol));
}

private static Node buildFromCode(Ast.Code c, Disambiguate disambiguate) throws InvalidQueryException {
Selector selector = disambiguate.mapQueryKey(getToken(c.code()));
return buildTree(c.operand(), disambiguate, selector, c.operator());
private static Node buildFromCode(Ast.Code c, Disambiguate disambiguate, String q) throws InvalidQueryException {
MappedCode mc = MappedCode.from(c, disambiguate);
return mc.selector().isValid()
? buildTree(c.operand(), disambiguate, mc, c.operator(), q)
: asFreeText(c, q, disambiguate.getTextQueryProperty()); // If the selector isn't valid, treat the whole segment as free text.
}

private static Condition buildCondition(Selector selector, Operator operator, Ast.Leaf leaf, Disambiguate disambiguate) {
Expand All @@ -129,9 +146,101 @@ private static Condition buildCondition(Selector selector, Operator operator, As
return condition.isTypeNode() ? condition.asTypeNode() : condition;
}

private record MappedCode(Ast.Code astCode, Selector selector) {
static MappedCode from(Ast.Code c, Disambiguate disambiguate) {
return new MappedCode(c, disambiguate.mapQueryKey(getToken(c.code())));
}
}

private static Token getToken(Lex.Symbol symbol) {
return symbol.name() == Lex.TokenName.QUOTED_STRING
? new Token.Quoted(symbol.value(), symbol.offset() + 1)
: new Token.Raw(symbol.value(), symbol.offset());
}

private static FreeText asFreeText(Ast.Code c, String q, Property.TextQuery textQuery) {
int from = c.code().offset();
SymbolPosition rightMostSymbol = findRightmostSymbol(c, null, 0);
int to = findSegmentEndIdx(q, rightMostSymbol);
String s = q.substring(from, to);
return new FreeText(textQuery, new Token.Raw(s, from));
}

private record SymbolPosition(Lex.Symbol symbol, int nestedLevel) {}

private static SymbolPosition findRightmostSymbol(Ast.Node n, Lex.Symbol currentRightmost, int currentLevel) {
return switch (n) {
case Ast.Group g -> g.operands().isEmpty()
? new SymbolPosition(currentRightmost, currentLevel + 1)
: findRightmostSymbol(g.operands().getLast(), currentRightmost, currentLevel + 1);
case Ast.Code c -> findRightmostSymbol(c.operand(), c.code(), currentLevel);
case Ast.Leaf l -> new SymbolPosition(l.value(), currentLevel);
case Ast.Not not -> findRightmostSymbol(not.operand(), currentRightmost, currentLevel);
};
}

private static int findSegmentEndIdx(String q, SymbolPosition symbolPosition) {
var level = symbolPosition.nestedLevel();
var symbol = symbolPosition.symbol();
var endIdx = symbol.offset() + symbol.value().length();
if (level > 0) {
var closing = findNthCharOccurrence(q.substring(endIdx), level, c -> c == ')');
return closing == -1 ? q.length() : endIdx + closing + 1;
}
// Single token within brackets, for example k:(v)
if (isInBrackets(symbol, q)) {
var closing = findNthCharOccurrence(q.substring(endIdx), 1, c -> c == ')');
return endIdx + closing + 1;
}
var nextWhitespace = findNthCharOccurrence(q.substring(endIdx), 1, Character::isWhitespace);
return nextWhitespace == -1 ? q.length() : endIdx + nextWhitespace;
}

private static boolean isInBrackets(Lex.Symbol symbol, String q) {
int start = symbol.offset();
int end = symbol.offset() + symbol.value().length();

if (symbol.isQuoted()) {
end += 2;
}

boolean foundOpening = false;

for (int i = start - 1; i >= 0; i--) {
var c = q.charAt(i);
if (Character.isWhitespace(c)) {
continue;
}
if (c == '(') {
foundOpening = true;
}
break;
}

if (foundOpening) {
for (int i = end; i < q.length(); i++) {
var c = q.charAt(i);
if (Character.isWhitespace(c)) {
continue;
}
return c == ')';
}
}

return false;
}

private static int findNthCharOccurrence(String s, int n, Predicate<Character> charTest) {
int count = 0;

for (int i = 0; i < s.length(); i++) {
if (charTest.test(s.charAt(i))) {
if (++count == n) {
return i;
}
}
}

return -1;
}
}
12 changes: 0 additions & 12 deletions whelk-core/src/test/groovy/whelk/search2/parse/AstSpec.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -141,18 +141,6 @@ class AstSpec extends Specification {
)
}

def "Fail code of code"() {
given:
def input = "AAA:(BBB:CCC)"
def lexedSymbols = Lex.lexQuery(input)
Parse.OrComb parseTree = Parse.parseQuery(lexedSymbols)

when:
Ast.buildFrom(parseTree)
then:
thrown InvalidQueryException
}

def "empty group as string"() {
given:
def input = "AAA OR ()"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,17 @@ package whelk.search2.querytree

import spock.lang.Specification
import whelk.JsonLd
import whelk.search2.AppParams
import whelk.search2.Disambiguate
import whelk.search2.ESSettings
import whelk.search2.Query
import whelk.search2.Operator
import whelk.search2.QueryParams
import whelk.search2.SelectedFacets

class QueryTreeSpec extends Specification {
Disambiguate disambiguate = TestData.getDisambiguate()
JsonLd jsonLd = TestData.getJsonLd()
static Disambiguate disambiguate = TestData.getDisambiguate()
static JsonLd jsonLd = TestData.getJsonLd()

static var p1 = new Property("p1", jsonLd)
static var p1v1 = new Condition(p1, Operator.EQUALS, new FreeText("v1"))
static var p1v2 = new Condition(p1, Operator.EQUALS, new FreeText("v2"))

def "back to query string"() {
expect:
Expand Down Expand Up @@ -75,6 +76,40 @@ class QueryTreeSpec extends Specification {
"category:\"https://id.kb.se/term/ktg/Z\"" | "category:\"https://id.kb.se/term/ktg/Z\""
}

def "treat invalid code segment as free text"() {
expect:
new QueryTree(input, disambiguate).tree() == parsed

where:
input | parsed
"k:v" | new FreeText("k:v")
"k=v" | new FreeText("k=v")
"k : v" | new FreeText("k : v")
"k :v" | new FreeText("k :v")
"k: v" | new FreeText("k: v")
"k:()" | new FreeText("k:()")
"k : ()" | new FreeText("k : ()")
"k :()" | new FreeText("k :()")
"k: ()" | new FreeText("k: ()")
"k:(v)" | new FreeText("k:(v)")
"k:(v )" | new FreeText("k:(v )")
"k:( v )" | new FreeText("k:( v )")
"k:( v)" | new FreeText("k:( v)")
"k:( \"v\" )" | new FreeText("k:( \"v\" )")
"k:(\"v\" )" | new FreeText("k:(\"v\" )")
"k:( \"v\")" | new FreeText("k:( \"v\")")
"k:(k : v)" | new FreeText("k:(k : v)")
"k:(x OR (a b))" | new FreeText("k:(x OR (a b))")
"x k:(x OR (a b) ) y" | new FreeText("x k:(x OR (a b) ) y")
"p1:v1 k:v x" | new And([p1v1, new FreeText("k:v x")])
"k:v x p1:v1" | new And([new FreeText("k:v x"), p1v1])
"k:(v) p1:v1" | new And([new FreeText("k:(v)"), p1v1])
"k:(a (b OR c)) p1:v1" | new And([new FreeText("k:(a (b OR c))"), p1v1])
"p1:v1 x k:(\"a\" (b OR c)) p1:v2" | new And([p1v1, new FreeText("x k:(\"a\" (b OR c))"), p1v2])
"p1:(p1:v1)" | new FreeText("p1:(p1:v1)")
"p1:(p1:v1 p1:v2) x p1:v1" | new And([new FreeText("p1:(p1:v1 p1:v2) x"), p1v1])
}

def "to search mapping"() {
given:
def tree = QueryTreeBuilder.buildTree('something (NOT p3:v3 OR p4:"v:4") includeA', disambiguate)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@ class TestData {
'identifycategory': ['librissearch:identifyCategory'] as Set,
'nonecategory' : ['librissearch:noneCategory'] as Set,
'p3p1' : ['p3p1'] as Set,
'bibliography' : ['bibliography'] as Set
'bibliography' : ['bibliography'] as Set,
'meta' : ['meta'] as Set
]
def classMappings = [
't1' : ['T1'] as Set,
Expand Down
Loading