From 0e517cb819084331b302ffb9bab821ce7bbd39ab Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Fri, 13 Feb 2026 15:10:35 -0800 Subject: [PATCH 1/3] add DictionaryEncodedValueIndex.getValueIterator and use it for ExpressionPredicateIndexSupplier changes: * Added `getValueIterator` method to `DictionaryEncodedValueIndex` to give an easy way for consumers to iterate the dictionary values in order * `ExpressionPredicateIndexSupplier` now uses `getValueIterator` to scan the dictionary values, offering a performance improvement, particularly when using front-coding * fixed a few other places that were iterating the dictionary using get to use iterator instead --- .../query/SqlExpressionBenchmark.java | 3 ++- .../ExpressionPredicateIndexSupplier.java | 7 +++-- .../druid/query/metadata/SegmentAnalyzer.java | 8 ++++-- .../query/search/UseIndexesStrategy.java | 9 +++++-- .../DictionaryEncodedColumnMerger.java | 4 +-- ...ringDictionaryEncodedStringValueIndex.java | 7 +++++ .../semantic/DictionaryEncodedValueIndex.java | 6 +++++ .../NestedFieldColumnIndexSupplier.java | 27 +++++++++++++++++++ .../ScalarDoubleColumnAndIndexSupplier.java | 23 +++++++++++++++- .../ScalarLongColumnAndIndexSupplier.java | 23 +++++++++++++++- .../virtual/ListFilteredVirtualColumn.java | 21 +++++++++++++++ .../org/apache/druid/cli/DumpSegment.java | 20 +++++++------- 12 files changed, 137 insertions(+), 21 deletions(-) diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java index cc9e7b888dad..8988973f9825 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java @@ -159,7 +159,8 @@ public class SqlExpressionBenchmark extends SqlBaseQueryBenchmark "SELECT CASE WHEN MOD(long1, 2) = 0 THEN -1 WHEN MOD(long1, 2) = 1 THEN long2 / MOD(long1, 2) ELSE long3 END FROM expressions GROUP BY 1", // cast "SELECT CAST(string1 as BIGINT) + CAST(string3 as DOUBLE) + long3, COUNT(*) FROM expressions GROUP BY 1 ORDER BY 2", - "SELECT COUNT(*), SUM(CAST(string1 as BIGINT) + CAST(string3 as BIGINT)) FROM expressions WHERE double3 < 1010.0 AND double3 > 100.0" + "SELECT COUNT(*), SUM(CAST(string1 as BIGINT) + CAST(string3 as BIGINT)) FROM expressions WHERE double3 < 1010.0 AND double3 > 100.0", + "SELECT COUNT(*) FROM expressions WHERE __time >= TIMESTAMP '2000-01-01 00:00:00' AND __time < TIMESTAMP '2000-01-02 00:00:00' AND (UPPER(COALESCE(string3,'')) LIKE '1%' OR TRIM(UPPER(COALESCE(string3,''))) LIKE '1%' OR SUBSTRING(UPPER(COALESCE(string3,'')),1,1) IN ('1','2','3','4','5') OR ('X' || UPPER(COALESCE(string3,''))) LIKE 'X1%') AND (UPPER(COALESCE(string5,'')) LIKE '2%' OR TRIM(UPPER(COALESCE(string5,''))) LIKE '2%' OR SUBSTRING(UPPER(COALESCE(string5,'')),1,1) IN ('1','2','3','4','5') OR ('Y' || UPPER(COALESCE(string5,''))) LIKE 'Y2%') AND CAST(double4 * 1000 AS BIGINT) BETWEEN -850000000 AND 850000000" ); @Param({ diff --git a/processing/src/main/java/org/apache/druid/math/expr/ExpressionPredicateIndexSupplier.java b/processing/src/main/java/org/apache/druid/math/expr/ExpressionPredicateIndexSupplier.java index d2c04770b7f6..a8a9a7629380 100644 --- a/processing/src/main/java/org/apache/druid/math/expr/ExpressionPredicateIndexSupplier.java +++ b/processing/src/main/java/org/apache/druid/math/expr/ExpressionPredicateIndexSupplier.java @@ -225,13 +225,16 @@ boolean nextMatches(@Nullable Object nextValue) private abstract static class BitmapIterator implements Iterator { private final DictionaryEncodedValueIndex inputColumnIndexes; + int next; int index = 0; boolean nextSet = false; + private final Iterator valuesIterator; private BitmapIterator(DictionaryEncodedValueIndex inputColumnIndexes) { this.inputColumnIndexes = inputColumnIndexes; + this.valuesIterator = inputColumnIndexes.getValueIterator(); } @Override @@ -258,8 +261,8 @@ public ImmutableBitmap next() private void findNext() { - while (!nextSet && index < inputColumnIndexes.getCardinality()) { - Object nextValue = inputColumnIndexes.getValue(index); + while (!nextSet && valuesIterator.hasNext()) { + final Object nextValue = valuesIterator.next(); nextSet = nextMatches(nextValue); if (nextSet) { next = index; diff --git a/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java b/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java index dd484e9a7070..1ebf6e5cc7fe 100644 --- a/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java +++ b/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java @@ -54,6 +54,7 @@ import javax.annotation.Nullable; import java.io.IOException; import java.util.EnumSet; +import java.util.Iterator; import java.util.LinkedHashMap; import java.util.Map; import java.util.Objects; @@ -212,11 +213,14 @@ private ColumnAnalysis analyzeStringColumn( if (valueIndex != null) { cardinality = valueIndex.getCardinality(); if (analyzingSize()) { - for (int i = 0; i < cardinality; ++i) { - String value = valueIndex.getValue(i); + final Iterator valueIterator = valueIndex.getValueIterator(); + int i = 0; + while (valueIterator.hasNext()) { + final String value = valueIterator.next(); if (value != null) { size += StringUtils.estimatedBinaryLengthAsUTF8(value) * ((long) valueIndex.getBitmap(i).size()); } + i++; } } if (analyzingMinMax() && cardinality > 0) { diff --git a/processing/src/main/java/org/apache/druid/query/search/UseIndexesStrategy.java b/processing/src/main/java/org/apache/druid/query/search/UseIndexesStrategy.java index 35bfbbb3838a..bd1eafa170e9 100644 --- a/processing/src/main/java/org/apache/druid/query/search/UseIndexesStrategy.java +++ b/processing/src/main/java/org/apache/druid/query/search/UseIndexesStrategy.java @@ -51,6 +51,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.Iterator; import java.util.List; public class UseIndexesStrategy extends SearchStrategy @@ -305,9 +306,12 @@ public Object2IntRBTreeMap execute(int limit) // these were checked to be non-null in partitionDimensionList final DictionaryEncodedStringValueIndex bitmapIndex = indexSupplier.as(DictionaryEncodedStringValueIndex.class); - for (int i = 0; i < bitmapIndex.getCardinality(); ++i) { - String dimVal = extractionFn.apply(bitmapIndex.getValue(i)); + final Iterator iterator = bitmapIndex.getValueIterator(); + int i = 0; + while (iterator.hasNext()) { + final String dimVal = extractionFn.apply(iterator.next()); if (!searchQuerySpec.accept(dimVal)) { + i++; continue; } ImmutableBitmap bitmap = bitmapIndex.getBitmap(i); @@ -320,6 +324,7 @@ public Object2IntRBTreeMap execute(int limit) return retVal; } } + i++; } } } diff --git a/processing/src/main/java/org/apache/druid/segment/DictionaryEncodedColumnMerger.java b/processing/src/main/java/org/apache/druid/segment/DictionaryEncodedColumnMerger.java index 393d54b3e83d..8ea88b580f67 100644 --- a/processing/src/main/java/org/apache/druid/segment/DictionaryEncodedColumnMerger.java +++ b/processing/src/main/java/org/apache/druid/segment/DictionaryEncodedColumnMerger.java @@ -581,8 +581,8 @@ protected IndexSeeker[] toIndexSeekers( private boolean allNull(Indexed dimValues) { - for (int i = 0, size = dimValues.size(); i < size; i++) { - if (dimValues.get(i) != null) { + for (T dimValue : dimValues) { + if (dimValue != null) { return false; } } diff --git a/processing/src/main/java/org/apache/druid/segment/index/IndexedStringDictionaryEncodedStringValueIndex.java b/processing/src/main/java/org/apache/druid/segment/index/IndexedStringDictionaryEncodedStringValueIndex.java index c3c0c304410c..2b999a01b37d 100644 --- a/processing/src/main/java/org/apache/druid/segment/index/IndexedStringDictionaryEncodedStringValueIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/index/IndexedStringDictionaryEncodedStringValueIndex.java @@ -25,6 +25,7 @@ import org.apache.druid.segment.index.semantic.DictionaryEncodedStringValueIndex; import javax.annotation.Nullable; +import java.util.Iterator; public final class IndexedStringDictionaryEncodedStringValueIndex> implements DictionaryEncodedStringValueIndex @@ -63,6 +64,12 @@ public BitmapFactory getBitmapFactory() return bitmapFactory; } + @Override + public Iterator getValueIterator() + { + return dictionary.iterator(); + } + @Override public ImmutableBitmap getBitmap(int idx) { diff --git a/processing/src/main/java/org/apache/druid/segment/index/semantic/DictionaryEncodedValueIndex.java b/processing/src/main/java/org/apache/druid/segment/index/semantic/DictionaryEncodedValueIndex.java index b60115e4066a..9ef6e2cffcc7 100644 --- a/processing/src/main/java/org/apache/druid/segment/index/semantic/DictionaryEncodedValueIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/index/semantic/DictionaryEncodedValueIndex.java @@ -24,6 +24,7 @@ import org.apache.druid.segment.column.DictionaryEncodedColumn; import javax.annotation.Nullable; +import java.util.Iterator; /** * This exposes a 'raw' view into bitmap value indexes for {@link DictionaryEncodedColumn}. This allows callers @@ -54,5 +55,10 @@ public interface DictionaryEncodedValueIndex @Nullable T getValue(int index); + /** + * Returns an {@link Iterator} containing all the underlying values of the dictionary in order + */ + Iterator getValueIterator(); + BitmapFactory getBitmapFactory(); } diff --git a/processing/src/main/java/org/apache/druid/segment/nested/NestedFieldColumnIndexSupplier.java b/processing/src/main/java/org/apache/druid/segment/nested/NestedFieldColumnIndexSupplier.java index 8b78c76c3434..f7bbc802d290 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/NestedFieldColumnIndexSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/NestedFieldColumnIndexSupplier.java @@ -385,6 +385,33 @@ public BitmapFactory getBitmapFactory() return bitmapFactory; } + @Override + public Iterator getValueIterator() + { + final Iterator localIterator = localDictionary.iterator(); + return new Iterator<>() + { + @Override + public boolean hasNext() + { + return localIterator.hasNext(); + } + + @Override + public String next() + { + int globalIndex = localIterator.next(); + if (globalIndex < adjustLongId) { + return StringUtils.fromUtf8Nullable(stringDictionary.get(globalIndex)); + } else if (globalIndex < adjustDoubleId) { + return String.valueOf(longDictionary.get(globalIndex - adjustLongId)); + } else { + return String.valueOf(doubleDictionary.get(globalIndex - adjustDoubleId)); + } + } + }; + } + @Override public ImmutableBitmap getBitmap(int idx) { diff --git a/processing/src/main/java/org/apache/druid/segment/nested/ScalarDoubleColumnAndIndexSupplier.java b/processing/src/main/java/org/apache/druid/segment/nested/ScalarDoubleColumnAndIndexSupplier.java index ac871372cdaf..81826eadb928 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/ScalarDoubleColumnAndIndexSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/ScalarDoubleColumnAndIndexSupplier.java @@ -34,6 +34,7 @@ import org.apache.druid.collections.bitmap.ImmutableBitmap; import org.apache.druid.java.util.common.RE; import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.math.expr.Evals; import org.apache.druid.math.expr.ExprEval; import org.apache.druid.math.expr.ExpressionType; import org.apache.druid.query.BitmapResultFactory; @@ -635,7 +636,7 @@ public int getCardinality() public String getValue(int index) { final Double value = dictionary.get(index); - return value == null ? null : String.valueOf(value); + return Evals.asString(value); } @Override @@ -643,5 +644,25 @@ public BitmapFactory getBitmapFactory() { return bitmapFactory; } + + @Override + public Iterator getValueIterator() + { + final Iterator delegate = dictionary.iterator(); + return new Iterator<>() + { + @Override + public boolean hasNext() + { + return delegate.hasNext(); + } + + @Override + public String next() + { + return Evals.asString(delegate.next()); + } + }; + } } } diff --git a/processing/src/main/java/org/apache/druid/segment/nested/ScalarLongColumnAndIndexSupplier.java b/processing/src/main/java/org/apache/druid/segment/nested/ScalarLongColumnAndIndexSupplier.java index 03ba8e4b4fd0..0818354ed5b7 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/ScalarLongColumnAndIndexSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/ScalarLongColumnAndIndexSupplier.java @@ -33,6 +33,7 @@ import org.apache.druid.common.guava.GuavaUtils; import org.apache.druid.java.util.common.RE; import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.math.expr.Evals; import org.apache.druid.math.expr.ExprEval; import org.apache.druid.math.expr.ExpressionType; import org.apache.druid.query.BitmapResultFactory; @@ -646,7 +647,7 @@ public int getCardinality() public String getValue(int index) { final Long value = dictionary.get(index); - return value == null ? null : String.valueOf(value); + return Evals.asString(value); } @Override @@ -654,5 +655,25 @@ public BitmapFactory getBitmapFactory() { return bitmapFactory; } + + @Override + public Iterator getValueIterator() + { + final Iterator delegate = dictionary.iterator(); + return new Iterator<>() + { + @Override + public boolean hasNext() + { + return delegate.hasNext(); + } + + @Override + public String next() + { + return Evals.asString(delegate.next()); + } + }; + } } } diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ListFilteredVirtualColumn.java b/processing/src/main/java/org/apache/druid/segment/virtual/ListFilteredVirtualColumn.java index 271473f481ef..41b58b04a398 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/ListFilteredVirtualColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/ListFilteredVirtualColumn.java @@ -720,6 +720,27 @@ public BitmapFactory getBitmapFactory() return delegate.getBitmapFactory(); } + @Override + public Iterator getValueIterator() + { + return new Iterator<>() + { + int position = 0; + + @Override + public boolean hasNext() + { + return position < idMapping.getValueCardinality(); + } + + @Override + public String next() + { + return delegate.getValue(idMapping.getReverseId(position++)); + } + }; + } + @Override public ImmutableBitmap getBitmap(int idx) { diff --git a/services/src/main/java/org/apache/druid/cli/DumpSegment.java b/services/src/main/java/org/apache/druid/cli/DumpSegment.java index 1986fb1ad9ac..a0cd9cc88fb8 100644 --- a/services/src/main/java/org/apache/druid/cli/DumpSegment.java +++ b/services/src/main/java/org/apache/druid/cli/DumpSegment.java @@ -86,7 +86,6 @@ import org.apache.druid.segment.data.BitmapSerdeFactory; import org.apache.druid.segment.data.ConciseBitmapSerdeFactory; import org.apache.druid.segment.data.FixedIndexed; -import org.apache.druid.segment.data.Indexed; import org.apache.druid.segment.data.RoaringBitmapSerdeFactory; import org.apache.druid.segment.file.SegmentFileMapperV10; import org.apache.druid.segment.filter.Filters; @@ -108,6 +107,7 @@ import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.EnumSet; +import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; @@ -488,21 +488,21 @@ public static void runDumpNestedColumn( } jg.writeEndArray(); - Indexed globalStringDictionary = nestedDataColumn.getUtf8BytesDictionary(); - Indexed globalLongDictionary = nestedDataColumn.getLongDictionary(); - Indexed globalDoubleDictionary = nestedDataColumn.getDoubleDictionary(); + Iterator globalStringIterator = nestedDataColumn.getUtf8BytesDictionary().iterator(); + Iterator globalLongIterator = nestedDataColumn.getLongDictionary().iterator(); + Iterator globalDoubleIterator = nestedDataColumn.getDoubleDictionary().iterator(); jg.writeFieldName("dictionaries"); jg.writeStartObject(); { int globalId = 0; jg.writeFieldName("strings"); jg.writeStartArray(); - for (int i = 0; i < globalStringDictionary.size(); i++, globalId++) { + while (globalStringIterator.hasNext()) { jg.writeStartObject(); jg.writeFieldName("globalId"); jg.writeNumber(globalId); jg.writeFieldName("value"); - final ByteBuffer val = globalStringDictionary.get(i); + final ByteBuffer val = globalStringIterator.next(); if (val == null) { jg.writeNull(); } else { @@ -514,24 +514,24 @@ public static void runDumpNestedColumn( jg.writeFieldName("longs"); jg.writeStartArray(); - for (int i = 0; i < globalLongDictionary.size(); i++, globalId++) { + while (globalLongIterator.hasNext()) { jg.writeStartObject(); jg.writeFieldName("globalId"); jg.writeNumber(globalId); jg.writeFieldName("value"); - jg.writeNumber(globalLongDictionary.get(i)); + jg.writeNumber(globalLongIterator.next()); jg.writeEndObject(); } jg.writeEndArray(); jg.writeFieldName("doubles"); jg.writeStartArray(); - for (int i = 0; i < globalDoubleDictionary.size(); i++, globalId++) { + while (globalDoubleIterator.hasNext()) { jg.writeStartObject(); jg.writeFieldName("globalId"); jg.writeNumber(globalId); jg.writeFieldName("value"); - jg.writeNumber(globalDoubleDictionary.get(i)); + jg.writeNumber(globalDoubleIterator.next()); jg.writeEndObject(); } jg.writeEndArray(); From e189af8b88415baec362e9923b4306ed3228eb19 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Fri, 13 Feb 2026 17:25:40 -0800 Subject: [PATCH 2/3] fix test --- .../src/main/java/org/apache/druid/cli/DumpSegment.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/services/src/main/java/org/apache/druid/cli/DumpSegment.java b/services/src/main/java/org/apache/druid/cli/DumpSegment.java index a0cd9cc88fb8..d6b8336b1679 100644 --- a/services/src/main/java/org/apache/druid/cli/DumpSegment.java +++ b/services/src/main/java/org/apache/druid/cli/DumpSegment.java @@ -500,7 +500,7 @@ public static void runDumpNestedColumn( while (globalStringIterator.hasNext()) { jg.writeStartObject(); jg.writeFieldName("globalId"); - jg.writeNumber(globalId); + jg.writeNumber(globalId++); jg.writeFieldName("value"); final ByteBuffer val = globalStringIterator.next(); if (val == null) { @@ -517,7 +517,7 @@ public static void runDumpNestedColumn( while (globalLongIterator.hasNext()) { jg.writeStartObject(); jg.writeFieldName("globalId"); - jg.writeNumber(globalId); + jg.writeNumber(globalId++); jg.writeFieldName("value"); jg.writeNumber(globalLongIterator.next()); jg.writeEndObject(); @@ -529,7 +529,7 @@ public static void runDumpNestedColumn( while (globalDoubleIterator.hasNext()) { jg.writeStartObject(); jg.writeFieldName("globalId"); - jg.writeNumber(globalId); + jg.writeNumber(globalId++); jg.writeFieldName("value"); jg.writeNumber(globalDoubleIterator.next()); jg.writeEndObject(); From 76c2f4196028329bd3bf84f1a1da704714ea9690 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Tue, 17 Feb 2026 21:29:01 -0800 Subject: [PATCH 3/3] share method --- .../NestedFieldColumnIndexSupplier.java | 35 ++++++++++--------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/nested/NestedFieldColumnIndexSupplier.java b/processing/src/main/java/org/apache/druid/segment/nested/NestedFieldColumnIndexSupplier.java index f7bbc802d290..66835edcb745 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/NestedFieldColumnIndexSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/NestedFieldColumnIndexSupplier.java @@ -352,7 +352,7 @@ protected ImmutableBitmap getUnknownsBitmap() }; } - private class NestedFieldDictionaryEncodedStringValueIndex implements DictionaryEncodedStringValueIndex + private final class NestedFieldDictionaryEncodedStringValueIndex implements DictionaryEncodedStringValueIndex { final FixedIndexed localDictionary = localDictionarySupplier.get(); final Indexed stringDictionary = globalStringDictionarySupplier.get(); @@ -369,14 +369,7 @@ public int getCardinality() @Override public String getValue(int index) { - int globalIndex = localDictionary.get(index); - if (globalIndex < adjustLongId) { - return StringUtils.fromUtf8Nullable(stringDictionary.get(globalIndex)); - } else if (globalIndex < adjustDoubleId) { - return String.valueOf(longDictionary.get(globalIndex - adjustLongId)); - } else { - return String.valueOf(doubleDictionary.get(globalIndex - adjustDoubleId)); - } + return getStringValueFromGlobalId(localDictionary.get(index)); } @Override @@ -400,14 +393,7 @@ public boolean hasNext() @Override public String next() { - int globalIndex = localIterator.next(); - if (globalIndex < adjustLongId) { - return StringUtils.fromUtf8Nullable(stringDictionary.get(globalIndex)); - } else if (globalIndex < adjustDoubleId) { - return String.valueOf(longDictionary.get(globalIndex - adjustLongId)); - } else { - return String.valueOf(doubleDictionary.get(globalIndex - adjustDoubleId)); - } + return getStringValueFromGlobalId(localIterator.next()); } }; } @@ -417,6 +403,21 @@ public ImmutableBitmap getBitmap(int idx) { return NestedFieldColumnIndexSupplier.this.getBitmap(idx); } + + @Nullable + private String getStringValueFromGlobalId(int globalIndex) + { + if (globalIndex == 0) { + return null; + } + if (globalIndex < adjustLongId) { + return StringUtils.fromUtf8Nullable(stringDictionary.get(globalIndex)); + } else if (globalIndex < adjustDoubleId) { + return String.valueOf(longDictionary.get(globalIndex - adjustLongId)); + } else { + return String.valueOf(doubleDictionary.get(globalIndex - adjustDoubleId)); + } + } } private class NestedStringValueSetIndexes implements StringValueSetIndexes