From ef18628bb63e04617cda088cda7531178c8d9236 Mon Sep 17 00:00:00 2001 From: Mohammad Mohtasham Date: Thu, 15 Aug 2024 14:53:16 -0700 Subject: [PATCH] Add auto fuziness object to FuzzyQuery from FuzzyParams (#692) * Add auto fuziness object to FuzzyQuery from FuzzyParams * Move AutoFuzziness to a top level message * Use the nested message without redefining it --- .../main/proto/yelp/nrtsearch/search.proto | 1 + .../queries/span_multi_term_query_wrapper.rst | 7 ++++ .../server/luceneserver/QueryNodeMapper.java | 11 ++++- .../search/query/SpanQueryTest.java | 40 ++++++++++++------- 4 files changed, 42 insertions(+), 17 deletions(-) diff --git a/clientlib/src/main/proto/yelp/nrtsearch/search.proto b/clientlib/src/main/proto/yelp/nrtsearch/search.proto index 230e4f4d8..90612fdef 100644 --- a/clientlib/src/main/proto/yelp/nrtsearch/search.proto +++ b/clientlib/src/main/proto/yelp/nrtsearch/search.proto @@ -455,6 +455,7 @@ message FuzzyQuery { RewriteMethod rewrite = 7; // Specifies the size to use for the TOP_TERMS* rewrite methods. int32 rewriteTopTermsSize = 8; + FuzzyParams.AutoFuzziness auto = 9; // Auto fuzziness which determines the max edits based on the term length. AUTO is the preferred setting. Either set this or maxEdits. } // Message for a SpanMultiTermQuery diff --git a/docs/queries/span_multi_term_query_wrapper.rst b/docs/queries/span_multi_term_query_wrapper.rst index eec525339..0a7885e3c 100644 --- a/docs/queries/span_multi_term_query_wrapper.rst +++ b/docs/queries/span_multi_term_query_wrapper.rst @@ -47,6 +47,13 @@ Proto definition: int32 maxExpansions = 4; // True if transpositions should be treated as a primitive edit operation. If this is false, comparisons will implement the classic Levenshtein algorithm. Default is true. bool transpositions = 5; + AutoFuzziness auto = 9; // Auto fuzziness which determines the max edits based on the term length. AUTO is the preferred setting. Either set this or maxEdits. + + // Optional low and high values for auto fuzziness. Defaults to low: 3 and high: 6 if both are unset. Valid values are low >= 0 and low < high + message FuzzyParams.AutoFuzziness { + int32 low = 10; // Optional low distance argument. + int32 high = 11; // Optional high distance argument. + } } // A query that matches documents that contain a specific prefix in a provided field. diff --git a/src/main/java/com/yelp/nrtsearch/server/luceneserver/QueryNodeMapper.java b/src/main/java/com/yelp/nrtsearch/server/luceneserver/QueryNodeMapper.java index 3e034b927..97f2c1ce3 100644 --- a/src/main/java/com/yelp/nrtsearch/server/luceneserver/QueryNodeMapper.java +++ b/src/main/java/com/yelp/nrtsearch/server/luceneserver/QueryNodeMapper.java @@ -16,6 +16,7 @@ package com.yelp.nrtsearch.server.luceneserver; import static com.yelp.nrtsearch.server.luceneserver.analysis.AnalyzerCreator.isAnalyzerDefined; +import static com.yelp.nrtsearch.server.utils.QueryUtils.computeMaxEditsFromTermLength; import com.yelp.nrtsearch.server.grpc.*; import com.yelp.nrtsearch.server.grpc.MultiMatchQuery.MatchType; @@ -719,8 +720,14 @@ private static FuzzyQuery getFuzzyQuery( protoSpanMultiTermQuery.getFuzzyQuery(); Term term = new Term(protoFuzzyQuery.getField(), protoFuzzyQuery.getText()); - int maxEdits = - protoFuzzyQuery.hasMaxEdits() ? protoFuzzyQuery.getMaxEdits() : FuzzyQuery.defaultMaxEdits; + int maxEdits = FuzzyQuery.defaultMaxEdits; + if (protoFuzzyQuery.hasAuto()) { + maxEdits = computeMaxEditsFromTermLength(term, protoFuzzyQuery.getAuto()); + } else { + if (protoFuzzyQuery.hasMaxEdits()) { + maxEdits = protoFuzzyQuery.getMaxEdits(); + } + } int prefixLength = protoFuzzyQuery.hasPrefixLength() diff --git a/src/test/java/com/yelp/nrtsearch/server/luceneserver/search/query/SpanQueryTest.java b/src/test/java/com/yelp/nrtsearch/server/luceneserver/search/query/SpanQueryTest.java index 3767242aa..fb2530070 100644 --- a/src/test/java/com/yelp/nrtsearch/server/luceneserver/search/query/SpanQueryTest.java +++ b/src/test/java/com/yelp/nrtsearch/server/luceneserver/search/query/SpanQueryTest.java @@ -17,22 +17,9 @@ import static org.junit.Assert.assertEquals; -import com.yelp.nrtsearch.server.grpc.AddDocumentRequest; +import com.yelp.nrtsearch.server.grpc.*; import com.yelp.nrtsearch.server.grpc.AddDocumentRequest.MultiValuedField; -import com.yelp.nrtsearch.server.grpc.FieldDefRequest; -import com.yelp.nrtsearch.server.grpc.FuzzyQuery; -import com.yelp.nrtsearch.server.grpc.PrefixQuery; -import com.yelp.nrtsearch.server.grpc.Query; -import com.yelp.nrtsearch.server.grpc.RegexpQuery; -import com.yelp.nrtsearch.server.grpc.SearchRequest; -import com.yelp.nrtsearch.server.grpc.SearchResponse; import com.yelp.nrtsearch.server.grpc.SearchResponse.Hit; -import com.yelp.nrtsearch.server.grpc.SpanMultiTermQuery; -import com.yelp.nrtsearch.server.grpc.SpanNearQuery; -import com.yelp.nrtsearch.server.grpc.SpanQuery; -import com.yelp.nrtsearch.server.grpc.TermQuery; -import com.yelp.nrtsearch.server.grpc.TermRangeQuery; -import com.yelp.nrtsearch.server.grpc.WildcardQuery; import com.yelp.nrtsearch.server.luceneserver.ServerTestCase; import io.grpc.testing.GrpcCleanupRule; import java.io.IOException; @@ -263,7 +250,7 @@ public void testSpanMultiTermQueryFuzzyQuery() { @Test public void testSpanMultiTermQueryFuzzyQueryMaxEdit() { - // Create a WildcardQuery object that should only match tomato + // Create a fuzzy query object that should only match tomato FuzzyQuery fuzzyQuery = FuzzyQuery.newBuilder().setField("text_field").setText("tomata").setMaxEdits(1).build(); @@ -277,6 +264,29 @@ public void testSpanMultiTermQueryFuzzyQueryMaxEdit() { assertIds(response, 4); } + @Test + public void testSpanMultiTermQueryFuzzyQueryAutoFuzziness() { + + // Create a fuzzy query object without max edits that should only match tomato. + FuzzyParams.AutoFuzziness autoFuzziness = + FuzzyParams.AutoFuzziness.newBuilder().setLow(3).setHigh(6).build(); + FuzzyQuery fuzzyQuery = + FuzzyQuery.newBuilder() + .setField("text_field") + .setText("tomata") + .setAuto(autoFuzziness) + .build(); + + SpanMultiTermQuery spanMultiTermQuery = + SpanMultiTermQuery.newBuilder().setFuzzyQuery(fuzzyQuery).build(); + + SpanQuery spanQuery = SpanQuery.newBuilder().setSpanMultiTermQuery(spanMultiTermQuery).build(); + + SearchResponse response = getGrpcServer().getBlockingStub().search(getSearchRequest(spanQuery)); + + assertIds(response, 4); + } + @Test public void testSpanMultiTermQueryFuzzyQueryAllParamsSet() {