From b58667c2acc342f5c744beecb65f2652cf4c2c1b Mon Sep 17 00:00:00 2001 From: Andrew Prudhomme Date: Mon, 30 Sep 2024 11:47:08 -0700 Subject: [PATCH] Update to lucene v9.12.0 --- build.gradle | 2 +- .../proto/yelp/nrtsearch/luceneserver.proto | 1 - grpc-gateway/luceneserver.pb.go | 1 - grpc-gateway/luceneserver.swagger.json | 2 +- .../server/luceneserver/ServerCodec.java | 4 ++-- .../field/ContextSuggestFieldDef.java | 4 ++-- .../server/luceneserver/ServerCodecTest.java | 6 +++--- .../field/VectorFieldDefTest.java | 19 ++----------------- .../field/registerFieldsVectorSearch.json | 11 ----------- 9 files changed, 11 insertions(+), 39 deletions(-) diff --git a/build.gradle b/build.gradle index 92a69167d..7d3b00488 100644 --- a/build.gradle +++ b/build.gradle @@ -35,7 +35,7 @@ def _artifactId = 'server' //This is for https://github.com/gradle/gradle/issues/11308 System.setProperty("org.gradle.internal.publish.checksums.insecure", "True") -def luceneVersion = '9.11.1' +def luceneVersion = '9.12.0' project.ext.slf4jVersion = '2.0.16' project.ext.grpcVersion = '1.66.0' project.ext.lz4Version = '1.8.0' diff --git a/clientlib/src/main/proto/yelp/nrtsearch/luceneserver.proto b/clientlib/src/main/proto/yelp/nrtsearch/luceneserver.proto index 99d9b31b1..280940fa0 100644 --- a/clientlib/src/main/proto/yelp/nrtsearch/luceneserver.proto +++ b/clientlib/src/main/proto/yelp/nrtsearch/luceneserver.proto @@ -601,7 +601,6 @@ message VectorIndexingOptions { // The number of bits to use for quantizing the vectors. It can have the following values: // 4 - half byte // 7 - signed byte (default) - // 8 - unsigned byte optional int32 quantized_bits = 6; // Whether to compress the vectors, if true, the vectors that are quantized with <= 4 bits will be compressed into // a single byte. If false, the vectors will be stored as is. This provides a trade-off of memory usage and speed. default: false diff --git a/grpc-gateway/luceneserver.pb.go b/grpc-gateway/luceneserver.pb.go index c326c0982..2d100552f 100644 --- a/grpc-gateway/luceneserver.pb.go +++ b/grpc-gateway/luceneserver.pb.go @@ -1425,7 +1425,6 @@ type VectorIndexingOptions struct { // // 4 - half byte // 7 - signed byte (default) - // 8 - unsigned byte QuantizedBits *int32 `protobuf:"varint,6,opt,name=quantized_bits,json=quantizedBits,proto3,oneof" json:"quantized_bits,omitempty"` // Whether to compress the vectors, if true, the vectors that are quantized with <= 4 bits will be compressed into // a single byte. If false, the vectors will be stored as is. This provides a trade-off of memory usage and speed. default: false diff --git a/grpc-gateway/luceneserver.swagger.json b/grpc-gateway/luceneserver.swagger.json index 2f066be63..f7fcd6565 100644 --- a/grpc-gateway/luceneserver.swagger.json +++ b/grpc-gateway/luceneserver.swagger.json @@ -5404,7 +5404,7 @@ "quantizedBits": { "type": "integer", "format": "int32", - "title": "The number of bits to use for quantizing the vectors. It can have the following values:\n 4 - half byte\n 7 - signed byte (default)\n 8 - unsigned byte" + "title": "The number of bits to use for quantizing the vectors. It can have the following values:\n 4 - half byte\n 7 - signed byte (default)" }, "quantizedCompress": { "type": "boolean", diff --git a/src/main/java/com/yelp/nrtsearch/server/luceneserver/ServerCodec.java b/src/main/java/com/yelp/nrtsearch/server/luceneserver/ServerCodec.java index 8cd726918..7a50cfff5 100644 --- a/src/main/java/com/yelp/nrtsearch/server/luceneserver/ServerCodec.java +++ b/src/main/java/com/yelp/nrtsearch/server/luceneserver/ServerCodec.java @@ -23,10 +23,10 @@ import org.apache.lucene.codecs.DocValuesFormat; import org.apache.lucene.codecs.KnnVectorsFormat; import org.apache.lucene.codecs.PostingsFormat; -import org.apache.lucene.codecs.lucene99.Lucene99Codec; +import org.apache.lucene.codecs.lucene912.Lucene912Codec; /** Implements per-index {@link Codec}. */ -public class ServerCodec extends Lucene99Codec { +public class ServerCodec extends Lucene912Codec { private final IndexStateManager stateManager; // nocommit expose compression control diff --git a/src/main/java/com/yelp/nrtsearch/server/luceneserver/field/ContextSuggestFieldDef.java b/src/main/java/com/yelp/nrtsearch/server/luceneserver/field/ContextSuggestFieldDef.java index aa58196b7..4f4cd5b60 100644 --- a/src/main/java/com/yelp/nrtsearch/server/luceneserver/field/ContextSuggestFieldDef.java +++ b/src/main/java/com/yelp/nrtsearch/server/luceneserver/field/ContextSuggestFieldDef.java @@ -25,7 +25,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.document.Document; -import org.apache.lucene.search.suggest.document.Completion99PostingsFormat; +import org.apache.lucene.search.suggest.document.Completion912PostingsFormat; import org.apache.lucene.search.suggest.document.ContextSuggestField; public class ContextSuggestFieldDef extends IndexableFieldDef { @@ -116,6 +116,6 @@ public Optional getSearchAnalyzer() { @Override public PostingsFormat getPostingsFormat() { - return new Completion99PostingsFormat(); + return new Completion912PostingsFormat(); } } diff --git a/src/test/java/com/yelp/nrtsearch/server/luceneserver/ServerCodecTest.java b/src/test/java/com/yelp/nrtsearch/server/luceneserver/ServerCodecTest.java index a1093d815..6b274cf54 100644 --- a/src/test/java/com/yelp/nrtsearch/server/luceneserver/ServerCodecTest.java +++ b/src/test/java/com/yelp/nrtsearch/server/luceneserver/ServerCodecTest.java @@ -32,8 +32,8 @@ import org.apache.lucene.backward_codecs.lucene90.Lucene90HnswVectorsFormat; import org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat; import org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat; +import org.apache.lucene.codecs.lucene912.Lucene912PostingsFormat; import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat; -import org.apache.lucene.codecs.lucene99.Lucene99PostingsFormat; import org.junit.BeforeClass; import org.junit.Test; @@ -61,7 +61,7 @@ public void testPostingFormat_default() { when(mockFieldDef.getPostingsFormat()).thenReturn(null); IndexStateManager mockStateManager = getManager(mockFieldDef); ServerCodec serverCodec = new ServerCodec(mockStateManager); - assertTrue(serverCodec.getPostingsFormatForField("field") instanceof Lucene99PostingsFormat); + assertTrue(serverCodec.getPostingsFormatForField("field") instanceof Lucene912PostingsFormat); verify(mockFieldDef, times(1)).getPostingsFormat(); verifyNoMoreInteractions(mockFieldDef); } @@ -97,7 +97,7 @@ public void testPostingFormat_internalField() { IndexStateManager mockStateManager = getManager(mockFieldDef); ServerCodec serverCodec = new ServerCodec(mockStateManager); assertTrue( - serverCodec.getPostingsFormatForField("internal_field") instanceof Lucene99PostingsFormat); + serverCodec.getPostingsFormatForField("internal_field") instanceof Lucene912PostingsFormat); verifyNoInteractions(mockFieldDef); } diff --git a/src/test/java/com/yelp/nrtsearch/server/luceneserver/field/VectorFieldDefTest.java b/src/test/java/com/yelp/nrtsearch/server/luceneserver/field/VectorFieldDefTest.java index 551144e33..6ea3d7b95 100644 --- a/src/test/java/com/yelp/nrtsearch/server/luceneserver/field/VectorFieldDefTest.java +++ b/src/test/java/com/yelp/nrtsearch/server/luceneserver/field/VectorFieldDefTest.java @@ -151,11 +151,6 @@ private void indexVectorSearchDocs() throws Exception { MultiValuedField.newBuilder() .addValue(createVectorString(random, 3, false)) .build()) - .putFields( - "quantized_vector_8", - MultiValuedField.newBuilder() - .addValue(createVectorString(random, 3, false)) - .build()) .putFields( "filter", MultiValuedField.newBuilder().addValue("term" + j % 10).build()) .build()); @@ -460,16 +455,6 @@ public void testQuantizedVectorSearch_7() { 0.001); } - @Test - public void testQuantizedVectorSearch_8() { - singleVectorQueryAndVerify( - "quantized_vector_8", - List.of(0.25f, 0.5f, 0.75f), - VectorSimilarityFunction.EUCLIDEAN, - 1.0f, - 0.001); - } - @Test public void testVectorSearch_boost() { singleVectorQueryAndVerify( @@ -684,7 +669,7 @@ public void testMultipleVectorSearch() { @Test public void testHybridVectorSearch() { - List queryVector = List.of(0.25f, 0.5f, 0.75f); + List queryVector = List.of(0.05f, 0.5f, 0.75f); String field = "vector_cosine"; SearchResponse searchResponse = getGrpcServer() @@ -1376,7 +1361,7 @@ public void testInvalidBits() { new VectorFieldDef("vector", field); fail(); } catch (IllegalArgumentException e) { - assertEquals("bits must be one of: 4, 7, 8; bits=9", e.getMessage()); + assertEquals("bits must be one of: 4, 7; bits=9", e.getMessage()); } } diff --git a/src/test/resources/field/registerFieldsVectorSearch.json b/src/test/resources/field/registerFieldsVectorSearch.json index 56ca9d3fd..593f1bcb5 100644 --- a/src/test/resources/field/registerFieldsVectorSearch.json +++ b/src/test/resources/field/registerFieldsVectorSearch.json @@ -110,17 +110,6 @@ "quantized_bits": 7 } }, - { - "name": "quantized_vector_8", - "type": "VECTOR", - "search": true, - "vectorDimensions": 3, - "vectorSimilarity": "l2_norm", - "vectorIndexingOptions": { - "type": "hnsw_scalar_quantized", - "quantized_bits": 8 - } - }, { "name": "filter", "type": "ATOM",