From ac610cbe7d212594f1cd2a03898363c34ea5e87c Mon Sep 17 00:00:00 2001 From: zzzxl Date: Mon, 23 Dec 2024 10:24:01 +0800 Subject: [PATCH] [fix](inverted index) Fix the issue with incorrect seek results in DICT_COMPRESS (#45738) Related PR: https://github.com/apache/doris/pull/44414 Problem Summary: In inverted index version 3 mode, using dictionary compression may lead to incorrect results after a seek operation. --- be/src/clucene | 2 +- .../test_inverted_index_v3.out | 12 +++++++++ .../test_inverted_index_v3.groovy | 26 +++++++++++++++++++ 3 files changed, 39 insertions(+), 1 deletion(-) diff --git a/be/src/clucene b/be/src/clucene index a506dbb6c523aa..2204eaec46a68e 160000 --- a/be/src/clucene +++ b/be/src/clucene @@ -1 +1 @@ -Subproject commit a506dbb6c523aa65044eb1c527a066d236172543 +Subproject commit 2204eaec46a68e5e9a1876b7021f24839ecb2cf0 diff --git a/regression-test/data/inverted_index_p0/test_inverted_index_v3.out b/regression-test/data/inverted_index_p0/test_inverted_index_v3.out index 9dc20f3e0e0a85..53f4eb7ae0a667 100644 --- a/regression-test/data/inverted_index_p0/test_inverted_index_v3.out +++ b/regression-test/data/inverted_index_p0/test_inverted_index_v3.out @@ -23,3 +23,15 @@ -- !sql -- 105 +-- !sql -- +238 + +-- !sql -- +104 + +-- !sql -- +104 + +-- !sql -- +105 + diff --git a/regression-test/suites/inverted_index_p0/test_inverted_index_v3.groovy b/regression-test/suites/inverted_index_p0/test_inverted_index_v3.groovy index ea7dd0b595f504..82389d84e3cd67 100644 --- a/regression-test/suites/inverted_index_p0/test_inverted_index_v3.groovy +++ b/regression-test/suites/inverted_index_p0/test_inverted_index_v3.groovy @@ -19,9 +19,11 @@ suite("test_inverted_index_v3", "p0"){ def indexTbName1 = "test_inverted_index_v3_1" def indexTbName2 = "test_inverted_index_v3_2" + def indexTbName3 = "test_inverted_index_v3_3" sql "DROP TABLE IF EXISTS ${indexTbName1}" sql "DROP TABLE IF EXISTS ${indexTbName2}" + sql "DROP TABLE IF EXISTS ${indexTbName3}" sql """ CREATE TABLE ${indexTbName1} ( @@ -59,6 +61,24 @@ suite("test_inverted_index_v3", "p0"){ ); """ + sql """ + CREATE TABLE ${indexTbName3} ( + `@timestamp` int(11) NULL COMMENT "", + `clientip` varchar(20) NULL COMMENT "", + `request` text NULL COMMENT "", + `status` int(11) NULL COMMENT "", + `size` int(11) NULL COMMENT "", + INDEX request_idx (`request`) USING INVERTED PROPERTIES("parser" = "english", "support_phrase" = "true", "dict_compression" = "true") COMMENT '' + ) ENGINE=OLAP + DUPLICATE KEY(`@timestamp`) + COMMENT "OLAP" + DISTRIBUTED BY RANDOM BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "inverted_index_storage_format" = "V3" + ); + """ + def load_httplogs_data = {table_name, label, read_flag, format_flag, file_name, ignore_failure=false, expected_succ_rows = -1, load_to_single_tablet = 'true' -> @@ -99,6 +119,7 @@ suite("test_inverted_index_v3", "p0"){ try { load_httplogs_data.call(indexTbName1, indexTbName1, 'true', 'json', 'documents-1000.json') load_httplogs_data.call(indexTbName2, indexTbName2, 'true', 'json', 'documents-1000.json') + load_httplogs_data.call(indexTbName3, indexTbName3, 'true', 'json', 'documents-1000.json') sql "sync" @@ -112,6 +133,11 @@ suite("test_inverted_index_v3", "p0"){ qt_sql """ select count() from ${indexTbName2} where request match_phrase 'hm bg'; """ qt_sql """ select count() from ${indexTbName2} where request match_phrase_prefix 'hm bg'; """ + qt_sql """ select count() from ${indexTbName3} where request match_any 'hm bg'; """ + qt_sql """ select count() from ${indexTbName3} where request match_all 'hm bg'; """ + qt_sql """ select count() from ${indexTbName3} where request match_phrase 'hm bg'; """ + qt_sql """ select count() from ${indexTbName3} where request match_phrase_prefix 'hm bg'; """ + } finally { } } \ No newline at end of file