From ed29d9f418a0c5991a3f7174572e52ebe612a5b6 Mon Sep 17 00:00:00 2001 From: swethakann Date: Tue, 16 Apr 2024 14:23:27 -0700 Subject: [PATCH] Add zeroTermsQuery for matchPhraseQuery (#648) Add zeroTermsQuery for matchPhraseQuery --- .../main/proto/yelp/nrtsearch/search.proto | 9 ++++ docs/queries/match_phrase.rst | 9 ++++ .../server/luceneserver/QueryNodeMapper.java | 13 +++++- .../yelp/nrtsearch/server/grpc/QueryTest.java | 42 +++++++++++++++++++ 4 files changed, 72 insertions(+), 1 deletion(-) diff --git a/clientlib/src/main/proto/yelp/nrtsearch/search.proto b/clientlib/src/main/proto/yelp/nrtsearch/search.proto index 815d3d76f..a43eb37fe 100644 --- a/clientlib/src/main/proto/yelp/nrtsearch/search.proto +++ b/clientlib/src/main/proto/yelp/nrtsearch/search.proto @@ -190,6 +190,15 @@ message MatchPhraseQuery { string query = 2; // The text to query with. int32 slop = 3; // Edit distance between respective positions of tokens generated by analyzing this query and the positions of terms in a document. Analyzer analyzer = 4; // Analyzer used to analyze the query. If not provided, the default search analyzer for the field would be used instead. + ZeroTerms zeroTermsQuery = 5; // Indicates whether none or all documents are returned if the analyzer removes all tokens. Valid values are NONE_ZERO_TERMS and ALL_ZERO_TERMS. + + // Zero Terms options when analyzer removes all tokens. + enum ZeroTerms { + // No documents are returned if the analyzer removes all tokens. + NONE_ZERO_TERMS = 0; + // All documents are returned if the analyzer removes all tokens. + ALL_ZERO_TERMS = 1; + } } // A query that matches documents containing terms in the same order as those in the analyzed query string. The final analyzed token is treated as a prefix diff --git a/docs/queries/match_phrase.rst b/docs/queries/match_phrase.rst index 2692dd397..740d8e6f0 100644 --- a/docs/queries/match_phrase.rst +++ b/docs/queries/match_phrase.rst @@ -12,4 +12,13 @@ Proto definition: string query = 2; // The text to query with. int32 slop = 3; // Edit distance between respective positions of tokens generated by analyzing this query and the positions of terms in a document. Analyzer analyzer = 4; // Analyzer used to analyze the query. If not provided, the default search analyzer for the field would be used instead. + ZeroTerms zeroTermsQuery = 5; // Indicates whether none or all documents are returned if the analyzer removes all tokens. Valid values are NONE_ZERO_TERMS and ALL_ZERO_TERMS. + + // Zero Terms options when analyzer removes all tokens. + enum ZeroTerms { + // No documents are returned if the analyzer removes all tokens. + NONE_ZERO_TERMS = 0; + // All documents are returned if the analyzer removes all tokens. + ALL_ZERO_TERMS = 1; + } } \ No newline at end of file diff --git a/src/main/java/com/yelp/nrtsearch/server/luceneserver/QueryNodeMapper.java b/src/main/java/com/yelp/nrtsearch/server/luceneserver/QueryNodeMapper.java index 470ee7f81..9ab163c06 100644 --- a/src/main/java/com/yelp/nrtsearch/server/luceneserver/QueryNodeMapper.java +++ b/src/main/java/com/yelp/nrtsearch/server/luceneserver/QueryNodeMapper.java @@ -393,7 +393,18 @@ private Query getMatchPhraseQuery(MatchPhraseQuery matchPhraseQuery, IndexState // This can happen if there are no tokens found after analyzing the query text if (phraseQuery == null) { - return new MatchNoDocsQuery(); + MatchPhraseQuery.ZeroTerms zeroTermsQuery = matchPhraseQuery.getZeroTermsQuery(); + switch (zeroTermsQuery) { + case NONE_ZERO_TERMS -> { + return new MatchNoDocsQuery(); + } + case ALL_ZERO_TERMS -> { + return new MatchAllDocsQuery(); + } + default -> throw new IllegalArgumentException( + zeroTermsQuery + + " not valid. ZeroTermsQuery should be NONE_ZERO_TERMS or ALL_ZERO_TERMS"); + } } return phraseQuery; } diff --git a/src/test/java/com/yelp/nrtsearch/server/grpc/QueryTest.java b/src/test/java/com/yelp/nrtsearch/server/grpc/QueryTest.java index 3a55e4705..32cc6e7cd 100644 --- a/src/test/java/com/yelp/nrtsearch/server/grpc/QueryTest.java +++ b/src/test/java/com/yelp/nrtsearch/server/grpc/QueryTest.java @@ -725,6 +725,48 @@ public void testSearchMatchPhraseQueryEmptyAfterAnalysis() { testQuery(query, responseTester); } + @Test + public void testSearchMatchPhraseQueryZeroTermsIsNone() { + Query query = + Query.newBuilder() + .setMatchPhraseQuery( + MatchPhraseQuery.newBuilder() + .setField("vendor_name") + .setQuery("/?/ ?//?") + .setSlop(1) + .setZeroTermsQuery(MatchPhraseQuery.ZeroTerms.NONE_ZERO_TERMS)) + .build(); + + Consumer responseTester = + searchResponse -> { + assertEquals(0, searchResponse.getTotalHits().getValue()); + assertEquals(0, searchResponse.getHitsList().size()); + }; + + testQuery(query, responseTester); + } + + @Test + public void testSearchMatchPhraseQueryZeroTermsIsAll() { + Query query = + Query.newBuilder() + .setMatchPhraseQuery( + MatchPhraseQuery.newBuilder() + .setField("vendor_name") + .setQuery("/?/ ?//?") + .setSlop(1) + .setZeroTermsQuery(MatchPhraseQuery.ZeroTerms.ALL_ZERO_TERMS)) + .build(); + + Consumer responseTester = + searchResponse -> { + assertEquals(2, searchResponse.getTotalHits().getValue()); + assertEquals(2, searchResponse.getHitsList().size()); + }; + + testQuery(query, responseTester); + } + @Test public void testSearchMatchPhraseQueryCustomAnalyzer() { Query query =