Skip to content

Commit

Permalink
Fixes #4165
Browse files Browse the repository at this point in the history
Closes #4164

Added new test document and updated assertions

Use tokenizer that mimics Solr's standardized tokenizer

Geo predicate tweaking

Implemented TextContainsPhrase for Solr

Signed-off-by: Allan Clements <[email protected]>
  • Loading branch information
criminosis committed Jan 12, 2025
1 parent e4e1520 commit 02b0e02
Show file tree
Hide file tree
Showing 4 changed files with 91 additions and 31 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -228,13 +228,16 @@ private void storeTest(String... stores) throws Exception {
true);
final Multimap<String, Object> doc3 = getDocument("Hello Bob, are you there?", -500, 10.1, Geoshape.point(47.0, 10.0), Geoshape.box(46.9, 9.9, 47.1, 10.1), Arrays.asList("7", "8", "9"), Sets.newHashSet("7", "8"), Instant.ofEpochSecond(3),
false);
final Multimap<String, Object> doc4 = getDocument("foo.com bar/test", -1001, 2, Geoshape.point(0, 0.0), Geoshape.box(46.6, 0, 46.9, 0.1), Arrays.asList("10", "11", "12"), Sets.newHashSet("9", "10"), Instant.ofEpochSecond(0),
false);

for (final String store : stores) {
initialize(store);

add(store, "doc1", doc1, true);
add(store, "doc2", doc2, true);
add(store, "doc3", doc3, false);
add(store, "doc3", doc3, true);
add(store, "doc4", doc4, false);

}

Expand Down Expand Up @@ -262,23 +265,24 @@ private void storeTest(String... stores) throws Exception {
assertEquals(0, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Text.CONTAINS, "worl"))).count());
assertEquals(1, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Text.CONTAINS, "Tomorrow world"))).count());
assertEquals(1, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Text.CONTAINS, "WorLD HELLO"))).count());
assertEquals(1, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Text.CONTAINS, "foo.com"))).count());
assertEquals(1, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Text.CONTAINS_FUZZY, "boby"))).count());

assertEquals(3, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Cmp.GREATER_THAN, "A"))).count());
assertEquals(4, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Cmp.GREATER_THAN, "A"))).count());
assertEquals(0, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Cmp.GREATER_THAN, "z"))).count());
assertEquals(1, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Cmp.GREATER_THAN, "world"))).count());

assertEquals(3, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Cmp.GREATER_THAN_EQUAL, "A"))).count());
assertEquals(4, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Cmp.GREATER_THAN_EQUAL, "A"))).count());
assertEquals(0, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Cmp.GREATER_THAN_EQUAL, "z"))).count());
assertEquals(3, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Cmp.GREATER_THAN_EQUAL, "world"))).count());

assertEquals(0, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Cmp.LESS_THAN, "A"))).count());
assertEquals(3, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Cmp.LESS_THAN, "z"))).count());
assertEquals(3, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Cmp.LESS_THAN, "world"))).count());
assertEquals(4, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Cmp.LESS_THAN, "z"))).count());
assertEquals(4, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Cmp.LESS_THAN, "world"))).count());

assertEquals(0, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Cmp.LESS_THAN_EQUAL, "A"))).count());
assertEquals(3, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Cmp.LESS_THAN_EQUAL, "z"))).count());
assertEquals(3, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Cmp.LESS_THAN_EQUAL, "world"))).count());
assertEquals(4, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Cmp.LESS_THAN_EQUAL, "z"))).count());
assertEquals(4, tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Cmp.LESS_THAN_EQUAL, "world"))).count());

//Ordering
result = tx.queryStream(new IndexQuery(store, PredicateCondition.of(TEXT, Text.CONTAINS, "world"), orderTimeDesc))
Expand Down Expand Up @@ -357,25 +361,25 @@ private void storeTest(String... stores) throws Exception {
//String
assertEquals(1, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.EQUAL, "Tomorrow is the world"))).count());
assertEquals(0, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.EQUAL, "world"))).count());
assertEquals(3, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.NOT_EQUAL, "bob"))).count());
assertEquals(4, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.NOT_EQUAL, "bob"))).count());
assertEquals(1, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Text.PREFIX, "Tomorrow"))).count());
assertEquals(0, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Text.PREFIX, "wor"))).count());
assertEquals(1, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Text.FUZZY, "Tomorow is the world"))).count());

assertEquals(3, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.GREATER_THAN, "A"))).count());
assertEquals(4, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.GREATER_THAN, "A"))).count());
assertEquals(0, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.GREATER_THAN, "z"))).count());
assertEquals(1, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.GREATER_THAN, "Hello world"))).count());
assertEquals(2, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.GREATER_THAN, "Hello world"))).count());

assertEquals(3, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.GREATER_THAN_EQUAL, "A"))).count());
assertEquals(4, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.GREATER_THAN_EQUAL, "A"))).count());
assertEquals(0, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.GREATER_THAN_EQUAL, "z"))).count());
assertEquals(2, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.GREATER_THAN_EQUAL, "Hello world"))).count());
assertEquals(3, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.GREATER_THAN_EQUAL, "Hello world"))).count());

assertEquals(0, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.LESS_THAN, "A"))).count());
assertEquals(3, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.LESS_THAN, "z"))).count());
assertEquals(4, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.LESS_THAN, "z"))).count());
assertEquals(1, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.LESS_THAN, "Hello world"))).count());

assertEquals(0, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.LESS_THAN_EQUAL, "A"))).count());
assertEquals(3, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.LESS_THAN_EQUAL, "z"))).count());
assertEquals(4, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.LESS_THAN_EQUAL, "z"))).count());
assertEquals(2, tx.queryStream(new IndexQuery(store, PredicateCondition.of(NAME, Cmp.LESS_THAN_EQUAL, "Hello world"))).count());

try {
Expand Down Expand Up @@ -413,8 +417,7 @@ private void storeTest(String... stores) throws Exception {
assertEquals(2, result.size());

result = tx.queryStream(new IndexQuery(store, Not.of(PredicateCondition.of(TEXT, Text.CONTAINS, "world")))).collect(Collectors.toList());
assertEquals(1, result.size());
assertEquals("doc3", result.get(0));
assertEquals(ImmutableSet.of("doc3", "doc4"), ImmutableSet.copyOf(result));

result = tx.queryStream(new IndexQuery(store, And.of(PredicateCondition.of(TIME, Cmp.EQUAL, -500), Not.of(PredicateCondition.of(TEXT, Text.CONTAINS, "world"))))).collect(Collectors.toList());
assertEquals(1, result.size());
Expand Down Expand Up @@ -449,8 +452,8 @@ private void storeTest(String... stores) throws Exception {
assertEquals(ImmutableSet.of("doc1", "doc2"), ImmutableSet.copyOf(result));

result = tx.queryStream(new IndexQuery(store, PredicateCondition.of(BOUNDARY, Geo.WITHIN, Geoshape.box(46.5, -0.5, 50.5, 10.5)))).collect(Collectors.toList());
assertEquals(3,result.size());
assertEquals(ImmutableSet.of("doc1", "doc2", "doc3"), ImmutableSet.copyOf(result));
assertEquals(4, result.size());
assertEquals(ImmutableSet.of("doc1", "doc2", "doc3", "doc4"), ImmutableSet.copyOf(result));

result = tx.queryStream(new IndexQuery(store, PredicateCondition.of(BOUNDARY, Geo.WITHIN, Geoshape.circle(48.5, 0.5, 200.00)))).collect(Collectors.toList());
assertEquals(2, result.size());
Expand All @@ -471,8 +474,8 @@ private void storeTest(String... stores) throws Exception {

result = tx.queryStream(new IndexQuery(store, PredicateCondition.of(BOUNDARY, Geo.DISJOINT, Geoshape.polygon(Arrays.asList(new double[][]
{{-5.0,47.0},{5.0,47.0},{5.0,50.0},{-5.0,50.0},{-5.0,47.0}}))))).collect(Collectors.toList());
assertEquals(1, result.size());
assertEquals(ImmutableSet.of("doc3"), ImmutableSet.copyOf(result));
assertEquals(2, result.size());
assertEquals(ImmutableSet.of("doc3", "doc4"), ImmutableSet.copyOf(result));
}

if (indexFeatures.supportsGeoContains()) {
Expand All @@ -486,8 +489,8 @@ private void storeTest(String... stores) throws Exception {
assertEquals(ImmutableSet.of("doc1","doc2"), ImmutableSet.copyOf(result));

result = tx.queryStream(new IndexQuery(store, PredicateCondition.of(BOUNDARY, Geo.INTERSECT, Geoshape.circle(48.5, 0.5, 200.00)))).collect(Collectors.toList());
assertEquals(2, result.size());
assertEquals(ImmutableSet.of("doc1", "doc2"), ImmutableSet.copyOf(result));
assertEquals(3, result.size());
assertEquals(ImmutableSet.of("doc1", "doc2", "doc4"), ImmutableSet.copyOf(result));

result = tx.queryStream(new IndexQuery(store, PredicateCondition.of(BOUNDARY, Geo.INTERSECT, Geoshape.polygon(Arrays.asList(new double[][] {{-1.0,48.0},{2.0,48.0},{2.0,49.0},{-1.0,49.0},{-1.0,48.0}}))))).collect(Collectors.toList());
assertEquals(2, result.size());
Expand Down Expand Up @@ -516,13 +519,13 @@ private void storeTest(String... stores) throws Exception {
assertEquals(2, tx.queryStream(new RawQuery(store,"text:\"world\"",NO_PARAS)).count());
assertEquals(2, tx.queryStream(new RawQuery(store,"time:[1000 TO 1020]",NO_PARAS)).count());
assertEquals(2, tx.queryStream(new RawQuery(store,"time:[1000 TO *]",NO_PARAS)).count());
assertEquals(3, tx.queryStream(new RawQuery(store,"time:[* TO *]",NO_PARAS)).count());
assertEquals(4, tx.queryStream(new RawQuery(store,"time:[* TO *]",NO_PARAS)).count());
assertEquals(1, tx.queryStream(new RawQuery(store,"weight:[5.1 TO 8.3]",NO_PARAS)).count());
assertEquals(1, tx.queryStream(new RawQuery(store,"weight:5.2",NO_PARAS)).count());
assertEquals(1, tx.queryStream(new RawQuery(store,"text:world AND time:1001",NO_PARAS)).count());
assertEquals(1, tx.queryStream(new RawQuery(store,"name:\"Hello world\"",NO_PARAS)).count());
assertEquals(1, tx.queryStream(new RawQuery(store, "boolean:true", NO_PARAS)).count());
assertEquals(2, tx.queryStream(new RawQuery(store, "boolean:false", NO_PARAS)).count());
assertEquals(3, tx.queryStream(new RawQuery(store, "boolean:false", NO_PARAS)).count());
assertEquals(2, tx.queryStream(new RawQuery(store, "date:{1970-01-01T00:00:01Z TO 1970-01-01T00:00:03Z]", NO_PARAS)).count());
assertEquals(3, tx.queryStream(new RawQuery(store, "date:[1970-01-01T00:00:01Z TO *]", NO_PARAS)).count());
assertEquals(1, tx.queryStream(new RawQuery(store, "date:\"1970-01-01T00:00:02Z\"", NO_PARAS)).count());
Expand Down Expand Up @@ -558,9 +561,9 @@ private void storeTest(String... stores) throws Exception {
assertEquals("doc3", tx.queryStream(new IndexQuery(store, PredicateCondition.of(DATE, Cmp.EQUAL, Instant.ofEpochSecond(3)))).findFirst().get());
assertEquals("doc3", tx.queryStream(new IndexQuery(store, PredicateCondition.of(DATE, Cmp.GREATER_THAN, Instant.ofEpochSecond(2)))).findFirst().get());
assertEquals(ImmutableSet.of("doc2", "doc3"), tx.queryStream(new IndexQuery(store, PredicateCondition.of(DATE, Cmp.GREATER_THAN_EQUAL, Instant.ofEpochSecond(2)))).collect(Collectors.toSet()));
assertEquals(ImmutableSet.of("doc1"), tx.queryStream(new IndexQuery(store, PredicateCondition.of(DATE, Cmp.LESS_THAN, Instant.ofEpochSecond(2)))).collect(Collectors.toSet()));
assertEquals(ImmutableSet.of("doc1", "doc2"), tx.queryStream(new IndexQuery(store, PredicateCondition.of(DATE, Cmp.LESS_THAN_EQUAL, Instant.ofEpochSecond(2)))).collect(Collectors.toSet()));
assertEquals(ImmutableSet.of("doc1", "doc3"), tx.queryStream(new IndexQuery(store, PredicateCondition.of(DATE, Cmp.NOT_EQUAL, Instant.ofEpochSecond(2)))).collect(Collectors.toSet()));
assertEquals(ImmutableSet.of("doc1", "doc4"), tx.queryStream(new IndexQuery(store, PredicateCondition.of(DATE, Cmp.LESS_THAN, Instant.ofEpochSecond(2)))).collect(Collectors.toSet()));
assertEquals(ImmutableSet.of("doc1", "doc2", "doc4"), tx.queryStream(new IndexQuery(store, PredicateCondition.of(DATE, Cmp.LESS_THAN_EQUAL, Instant.ofEpochSecond(2)))).collect(Collectors.toSet()));
assertEquals(ImmutableSet.of("doc1", "doc3", "doc4"), tx.queryStream(new IndexQuery(store, PredicateCondition.of(DATE, Cmp.NOT_EQUAL, Instant.ofEpochSecond(2)))).collect(Collectors.toSet()));


//Update some data
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,10 @@
import org.apache.http.impl.auth.KerberosScheme;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CachingTokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.solr.client.solrj.SolrClient;
Expand Down Expand Up @@ -95,6 +98,7 @@
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.io.StringReader;
import java.io.UncheckedIOException;
import java.lang.reflect.Constructor;
import java.text.DateFormat;
Expand Down Expand Up @@ -862,7 +866,8 @@ public String buildQueryFilter(Condition<JanusGraphElement> condition, KeyInform
return tokenize(ParameterType.TEXT_ANALYZER, information, value, key, predicate);
} else if (predicate == Text.PREFIX || predicate == Text.CONTAINS_PREFIX
|| predicate == Text.REGEX || predicate == Text.CONTAINS_REGEX
|| predicate == Text.FUZZY || predicate == Text.CONTAINS_FUZZY) {
|| predicate == Text.FUZZY || predicate == Text.CONTAINS_FUZZY
|| predicate == Text.CONTAINS_PHRASE) {
return buildQueryFilterStringValue(key, (String) value, predicate, information);
} else if (predicate == Cmp.LESS_THAN || predicate == Cmp.LESS_THAN_EQUAL
|| predicate == Cmp.GREATER_THAN || predicate == Cmp.GREATER_THAN_EQUAL) {
Expand Down Expand Up @@ -991,6 +996,8 @@ public String buildQueryFilterStringValue(String key, String value, JanusGraphPr
return (stringKey + ":" + escapeValue(value) + "*");
} else if (predicate == Text.CONTAINS_PREFIX) {
return (key + ":" + escapeValue(value) + "*");
} else if (predicate == Text.CONTAINS_PHRASE) {
return (key + ":\"" + escapeValue(value) + "\"");
} else if (predicate == Text.REGEX) {
return (stringKey + ":/" + value + "/");
} else if (predicate == Text.CONTAINS_REGEX) {
Expand Down Expand Up @@ -1027,7 +1034,8 @@ private String tokenize(ParameterType parameterType, KeyInformation.StoreRetriev
if (analyzer != null) {
terms = customTokenize(analyzer, key, (String) value);
} else if (parameterType == ParameterType.TEXT_ANALYZER) {
terms = Text.tokenize((String) value);
//If a custom tokenizer was not specified, assume the standard one as defined in the default Solr Configset
terms = standardTokenizer((String) value);
} else {
return buildQueryFilterStringValue(key, (String) value, janusgraphPredicate, information);
}
Expand Down Expand Up @@ -1162,13 +1170,15 @@ public boolean supports(KeyInformation information, JanusGraphPredicate predicat
case DEFAULT:
case TEXT:
return predicate == Text.CONTAINS || predicate == Text.CONTAINS_PREFIX
|| predicate == Text.CONTAINS_REGEX || predicate == Text.CONTAINS_FUZZY;
|| predicate == Text.CONTAINS_REGEX || predicate == Text.CONTAINS_FUZZY
|| predicate == Text.CONTAINS_PHRASE;
case STRING:
return predicate instanceof Cmp || predicate==Text.REGEX || predicate==Text.PREFIX || predicate == Text.FUZZY;
case TEXTSTRING:
return predicate instanceof Cmp || predicate == Text.REGEX || predicate == Text.PREFIX || predicate == Text.FUZZY
|| predicate == Text.CONTAINS || predicate == Text.CONTAINS_PREFIX
|| predicate == Text.CONTAINS_REGEX || predicate == Text.CONTAINS_FUZZY;
|| predicate == Text.CONTAINS_REGEX || predicate == Text.CONTAINS_FUZZY
|| predicate == Text.CONTAINS_PHRASE;
}
} else if (dataType == Date.class || dataType == Instant.class) {
return predicate instanceof Cmp;
Expand Down Expand Up @@ -1267,6 +1277,20 @@ public boolean exists() throws BackendException {
/*
################# UTILITY METHODS #######################
*/
static List<String> standardTokenizer(String text) {
List<String> result = new ArrayList<>();
try (Tokenizer tokenizer = new StandardTokenizer(TokenStream.DEFAULT_TOKEN_ATTRIBUTE_FACTORY)) {
tokenizer.setReader(new StringReader(text));
CharTermAttribute attr = tokenizer.addAttribute(CharTermAttribute.class);
tokenizer.reset();
while (tokenizer.incrementToken()) {
result.add(attr.toString());
}
return result;
} catch (IOException e) {
throw new UncheckedIOException(e);

Check warning on line 1291 in janusgraph-solr/src/main/java/org/janusgraph/diskstorage/solr/SolrIndex.java

View check run for this annotation

Codecov / codecov/patch

janusgraph-solr/src/main/java/org/janusgraph/diskstorage/solr/SolrIndex.java#L1290-L1291

Added lines #L1290 - L1291 were not covered by tests
}
}

static Optional<String> getDualFieldName(String fieldKey, KeyInformation ki) {
if (AttributeUtils.isString(ki.getDataType()) && Mapping.getMapping(ki) == Mapping.TEXTSTRING) {
Expand Down
Loading

0 comments on commit 02b0e02

Please sign in to comment.