Skip to content

Commit

Permalink
Add auto fuziness object to FuzzyQuery from FuzzyParams (#692)
Browse files Browse the repository at this point in the history
* Add auto fuziness object to FuzzyQuery from FuzzyParams

* Move AutoFuzziness to a top level message

* Use the nested message without redefining it
  • Loading branch information
vim345 authored Aug 15, 2024
1 parent 5a5a82f commit ef18628
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 17 deletions.
1 change: 1 addition & 0 deletions clientlib/src/main/proto/yelp/nrtsearch/search.proto
Original file line number Diff line number Diff line change
Expand Up @@ -455,6 +455,7 @@ message FuzzyQuery {
RewriteMethod rewrite = 7;
// Specifies the size to use for the TOP_TERMS* rewrite methods.
int32 rewriteTopTermsSize = 8;
FuzzyParams.AutoFuzziness auto = 9; // Auto fuzziness which determines the max edits based on the term length. AUTO is the preferred setting. Either set this or maxEdits.
}

// Message for a SpanMultiTermQuery
Expand Down
7 changes: 7 additions & 0 deletions docs/queries/span_multi_term_query_wrapper.rst
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,13 @@ Proto definition:
int32 maxExpansions = 4;
// True if transpositions should be treated as a primitive edit operation. If this is false, comparisons will implement the classic Levenshtein algorithm. Default is true.
bool transpositions = 5;
AutoFuzziness auto = 9; // Auto fuzziness which determines the max edits based on the term length. AUTO is the preferred setting. Either set this or maxEdits.
// Optional low and high values for auto fuzziness. Defaults to low: 3 and high: 6 if both are unset. Valid values are low >= 0 and low < high
message FuzzyParams.AutoFuzziness {
int32 low = 10; // Optional low distance argument.
int32 high = 11; // Optional high distance argument.
}
}
// A query that matches documents that contain a specific prefix in a provided field.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
package com.yelp.nrtsearch.server.luceneserver;

import static com.yelp.nrtsearch.server.luceneserver.analysis.AnalyzerCreator.isAnalyzerDefined;
import static com.yelp.nrtsearch.server.utils.QueryUtils.computeMaxEditsFromTermLength;

import com.yelp.nrtsearch.server.grpc.*;
import com.yelp.nrtsearch.server.grpc.MultiMatchQuery.MatchType;
Expand Down Expand Up @@ -719,8 +720,14 @@ private static FuzzyQuery getFuzzyQuery(
protoSpanMultiTermQuery.getFuzzyQuery();
Term term = new Term(protoFuzzyQuery.getField(), protoFuzzyQuery.getText());

int maxEdits =
protoFuzzyQuery.hasMaxEdits() ? protoFuzzyQuery.getMaxEdits() : FuzzyQuery.defaultMaxEdits;
int maxEdits = FuzzyQuery.defaultMaxEdits;
if (protoFuzzyQuery.hasAuto()) {
maxEdits = computeMaxEditsFromTermLength(term, protoFuzzyQuery.getAuto());
} else {
if (protoFuzzyQuery.hasMaxEdits()) {
maxEdits = protoFuzzyQuery.getMaxEdits();
}
}

int prefixLength =
protoFuzzyQuery.hasPrefixLength()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,22 +17,9 @@

import static org.junit.Assert.assertEquals;

import com.yelp.nrtsearch.server.grpc.AddDocumentRequest;
import com.yelp.nrtsearch.server.grpc.*;
import com.yelp.nrtsearch.server.grpc.AddDocumentRequest.MultiValuedField;
import com.yelp.nrtsearch.server.grpc.FieldDefRequest;
import com.yelp.nrtsearch.server.grpc.FuzzyQuery;
import com.yelp.nrtsearch.server.grpc.PrefixQuery;
import com.yelp.nrtsearch.server.grpc.Query;
import com.yelp.nrtsearch.server.grpc.RegexpQuery;
import com.yelp.nrtsearch.server.grpc.SearchRequest;
import com.yelp.nrtsearch.server.grpc.SearchResponse;
import com.yelp.nrtsearch.server.grpc.SearchResponse.Hit;
import com.yelp.nrtsearch.server.grpc.SpanMultiTermQuery;
import com.yelp.nrtsearch.server.grpc.SpanNearQuery;
import com.yelp.nrtsearch.server.grpc.SpanQuery;
import com.yelp.nrtsearch.server.grpc.TermQuery;
import com.yelp.nrtsearch.server.grpc.TermRangeQuery;
import com.yelp.nrtsearch.server.grpc.WildcardQuery;
import com.yelp.nrtsearch.server.luceneserver.ServerTestCase;
import io.grpc.testing.GrpcCleanupRule;
import java.io.IOException;
Expand Down Expand Up @@ -263,7 +250,7 @@ public void testSpanMultiTermQueryFuzzyQuery() {
@Test
public void testSpanMultiTermQueryFuzzyQueryMaxEdit() {

// Create a WildcardQuery object that should only match tomato
// Create a fuzzy query object that should only match tomato
FuzzyQuery fuzzyQuery =
FuzzyQuery.newBuilder().setField("text_field").setText("tomata").setMaxEdits(1).build();

Expand All @@ -277,6 +264,29 @@ public void testSpanMultiTermQueryFuzzyQueryMaxEdit() {
assertIds(response, 4);
}

@Test
public void testSpanMultiTermQueryFuzzyQueryAutoFuzziness() {

// Create a fuzzy query object without max edits that should only match tomato.
FuzzyParams.AutoFuzziness autoFuzziness =
FuzzyParams.AutoFuzziness.newBuilder().setLow(3).setHigh(6).build();
FuzzyQuery fuzzyQuery =
FuzzyQuery.newBuilder()
.setField("text_field")
.setText("tomata")
.setAuto(autoFuzziness)
.build();

SpanMultiTermQuery spanMultiTermQuery =
SpanMultiTermQuery.newBuilder().setFuzzyQuery(fuzzyQuery).build();

SpanQuery spanQuery = SpanQuery.newBuilder().setSpanMultiTermQuery(spanMultiTermQuery).build();

SearchResponse response = getGrpcServer().getBlockingStub().search(getSearchRequest(spanQuery));

assertIds(response, 4);
}

@Test
public void testSpanMultiTermQueryFuzzyQueryAllParamsSet() {

Expand Down

0 comments on commit ef18628

Please sign in to comment.