diff --git a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java index cc9e7b888dad..8988973f9825 100644 --- a/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java +++ b/benchmarks/src/test/java/org/apache/druid/benchmark/query/SqlExpressionBenchmark.java @@ -159,7 +159,8 @@ public class SqlExpressionBenchmark extends SqlBaseQueryBenchmark "SELECT CASE WHEN MOD(long1, 2) = 0 THEN -1 WHEN MOD(long1, 2) = 1 THEN long2 / MOD(long1, 2) ELSE long3 END FROM expressions GROUP BY 1", // cast "SELECT CAST(string1 as BIGINT) + CAST(string3 as DOUBLE) + long3, COUNT(*) FROM expressions GROUP BY 1 ORDER BY 2", - "SELECT COUNT(*), SUM(CAST(string1 as BIGINT) + CAST(string3 as BIGINT)) FROM expressions WHERE double3 < 1010.0 AND double3 > 100.0" + "SELECT COUNT(*), SUM(CAST(string1 as BIGINT) + CAST(string3 as BIGINT)) FROM expressions WHERE double3 < 1010.0 AND double3 > 100.0", + "SELECT COUNT(*) FROM expressions WHERE __time >= TIMESTAMP '2000-01-01 00:00:00' AND __time < TIMESTAMP '2000-01-02 00:00:00' AND (UPPER(COALESCE(string3,'')) LIKE '1%' OR TRIM(UPPER(COALESCE(string3,''))) LIKE '1%' OR SUBSTRING(UPPER(COALESCE(string3,'')),1,1) IN ('1','2','3','4','5') OR ('X' || UPPER(COALESCE(string3,''))) LIKE 'X1%') AND (UPPER(COALESCE(string5,'')) LIKE '2%' OR TRIM(UPPER(COALESCE(string5,''))) LIKE '2%' OR SUBSTRING(UPPER(COALESCE(string5,'')),1,1) IN ('1','2','3','4','5') OR ('Y' || UPPER(COALESCE(string5,''))) LIKE 'Y2%') AND CAST(double4 * 1000 AS BIGINT) BETWEEN -850000000 AND 850000000" ); @Param({ diff --git a/processing/src/main/java/org/apache/druid/math/expr/ExpressionPredicateIndexSupplier.java b/processing/src/main/java/org/apache/druid/math/expr/ExpressionPredicateIndexSupplier.java index d2c04770b7f6..a8a9a7629380 100644 --- a/processing/src/main/java/org/apache/druid/math/expr/ExpressionPredicateIndexSupplier.java +++ b/processing/src/main/java/org/apache/druid/math/expr/ExpressionPredicateIndexSupplier.java @@ -225,13 +225,16 @@ boolean nextMatches(@Nullable Object nextValue) private abstract static class BitmapIterator implements Iterator { private final DictionaryEncodedValueIndex inputColumnIndexes; + int next; int index = 0; boolean nextSet = false; + private final Iterator valuesIterator; private BitmapIterator(DictionaryEncodedValueIndex inputColumnIndexes) { this.inputColumnIndexes = inputColumnIndexes; + this.valuesIterator = inputColumnIndexes.getValueIterator(); } @Override @@ -258,8 +261,8 @@ public ImmutableBitmap next() private void findNext() { - while (!nextSet && index < inputColumnIndexes.getCardinality()) { - Object nextValue = inputColumnIndexes.getValue(index); + while (!nextSet && valuesIterator.hasNext()) { + final Object nextValue = valuesIterator.next(); nextSet = nextMatches(nextValue); if (nextSet) { next = index; diff --git a/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java b/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java index dd484e9a7070..1ebf6e5cc7fe 100644 --- a/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java +++ b/processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java @@ -54,6 +54,7 @@ import javax.annotation.Nullable; import java.io.IOException; import java.util.EnumSet; +import java.util.Iterator; import java.util.LinkedHashMap; import java.util.Map; import java.util.Objects; @@ -212,11 +213,14 @@ private ColumnAnalysis analyzeStringColumn( if (valueIndex != null) { cardinality = valueIndex.getCardinality(); if (analyzingSize()) { - for (int i = 0; i < cardinality; ++i) { - String value = valueIndex.getValue(i); + final Iterator valueIterator = valueIndex.getValueIterator(); + int i = 0; + while (valueIterator.hasNext()) { + final String value = valueIterator.next(); if (value != null) { size += StringUtils.estimatedBinaryLengthAsUTF8(value) * ((long) valueIndex.getBitmap(i).size()); } + i++; } } if (analyzingMinMax() && cardinality > 0) { diff --git a/processing/src/main/java/org/apache/druid/query/search/UseIndexesStrategy.java b/processing/src/main/java/org/apache/druid/query/search/UseIndexesStrategy.java index 35bfbbb3838a..bd1eafa170e9 100644 --- a/processing/src/main/java/org/apache/druid/query/search/UseIndexesStrategy.java +++ b/processing/src/main/java/org/apache/druid/query/search/UseIndexesStrategy.java @@ -51,6 +51,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.Iterator; import java.util.List; public class UseIndexesStrategy extends SearchStrategy @@ -305,9 +306,12 @@ public Object2IntRBTreeMap execute(int limit) // these were checked to be non-null in partitionDimensionList final DictionaryEncodedStringValueIndex bitmapIndex = indexSupplier.as(DictionaryEncodedStringValueIndex.class); - for (int i = 0; i < bitmapIndex.getCardinality(); ++i) { - String dimVal = extractionFn.apply(bitmapIndex.getValue(i)); + final Iterator iterator = bitmapIndex.getValueIterator(); + int i = 0; + while (iterator.hasNext()) { + final String dimVal = extractionFn.apply(iterator.next()); if (!searchQuerySpec.accept(dimVal)) { + i++; continue; } ImmutableBitmap bitmap = bitmapIndex.getBitmap(i); @@ -320,6 +324,7 @@ public Object2IntRBTreeMap execute(int limit) return retVal; } } + i++; } } } diff --git a/processing/src/main/java/org/apache/druid/segment/DictionaryEncodedColumnMerger.java b/processing/src/main/java/org/apache/druid/segment/DictionaryEncodedColumnMerger.java index 393d54b3e83d..8ea88b580f67 100644 --- a/processing/src/main/java/org/apache/druid/segment/DictionaryEncodedColumnMerger.java +++ b/processing/src/main/java/org/apache/druid/segment/DictionaryEncodedColumnMerger.java @@ -581,8 +581,8 @@ protected IndexSeeker[] toIndexSeekers( private boolean allNull(Indexed dimValues) { - for (int i = 0, size = dimValues.size(); i < size; i++) { - if (dimValues.get(i) != null) { + for (T dimValue : dimValues) { + if (dimValue != null) { return false; } } diff --git a/processing/src/main/java/org/apache/druid/segment/index/IndexedStringDictionaryEncodedStringValueIndex.java b/processing/src/main/java/org/apache/druid/segment/index/IndexedStringDictionaryEncodedStringValueIndex.java index c3c0c304410c..2b999a01b37d 100644 --- a/processing/src/main/java/org/apache/druid/segment/index/IndexedStringDictionaryEncodedStringValueIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/index/IndexedStringDictionaryEncodedStringValueIndex.java @@ -25,6 +25,7 @@ import org.apache.druid.segment.index.semantic.DictionaryEncodedStringValueIndex; import javax.annotation.Nullable; +import java.util.Iterator; public final class IndexedStringDictionaryEncodedStringValueIndex> implements DictionaryEncodedStringValueIndex @@ -63,6 +64,12 @@ public BitmapFactory getBitmapFactory() return bitmapFactory; } + @Override + public Iterator getValueIterator() + { + return dictionary.iterator(); + } + @Override public ImmutableBitmap getBitmap(int idx) { diff --git a/processing/src/main/java/org/apache/druid/segment/index/semantic/DictionaryEncodedValueIndex.java b/processing/src/main/java/org/apache/druid/segment/index/semantic/DictionaryEncodedValueIndex.java index b60115e4066a..9ef6e2cffcc7 100644 --- a/processing/src/main/java/org/apache/druid/segment/index/semantic/DictionaryEncodedValueIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/index/semantic/DictionaryEncodedValueIndex.java @@ -24,6 +24,7 @@ import org.apache.druid.segment.column.DictionaryEncodedColumn; import javax.annotation.Nullable; +import java.util.Iterator; /** * This exposes a 'raw' view into bitmap value indexes for {@link DictionaryEncodedColumn}. This allows callers @@ -54,5 +55,10 @@ public interface DictionaryEncodedValueIndex @Nullable T getValue(int index); + /** + * Returns an {@link Iterator} containing all the underlying values of the dictionary in order + */ + Iterator getValueIterator(); + BitmapFactory getBitmapFactory(); } diff --git a/processing/src/main/java/org/apache/druid/segment/nested/NestedFieldColumnIndexSupplier.java b/processing/src/main/java/org/apache/druid/segment/nested/NestedFieldColumnIndexSupplier.java index 8b78c76c3434..66835edcb745 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/NestedFieldColumnIndexSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/NestedFieldColumnIndexSupplier.java @@ -352,7 +352,7 @@ protected ImmutableBitmap getUnknownsBitmap() }; } - private class NestedFieldDictionaryEncodedStringValueIndex implements DictionaryEncodedStringValueIndex + private final class NestedFieldDictionaryEncodedStringValueIndex implements DictionaryEncodedStringValueIndex { final FixedIndexed localDictionary = localDictionarySupplier.get(); final Indexed stringDictionary = globalStringDictionarySupplier.get(); @@ -369,14 +369,7 @@ public int getCardinality() @Override public String getValue(int index) { - int globalIndex = localDictionary.get(index); - if (globalIndex < adjustLongId) { - return StringUtils.fromUtf8Nullable(stringDictionary.get(globalIndex)); - } else if (globalIndex < adjustDoubleId) { - return String.valueOf(longDictionary.get(globalIndex - adjustLongId)); - } else { - return String.valueOf(doubleDictionary.get(globalIndex - adjustDoubleId)); - } + return getStringValueFromGlobalId(localDictionary.get(index)); } @Override @@ -385,11 +378,46 @@ public BitmapFactory getBitmapFactory() return bitmapFactory; } + @Override + public Iterator getValueIterator() + { + final Iterator localIterator = localDictionary.iterator(); + return new Iterator<>() + { + @Override + public boolean hasNext() + { + return localIterator.hasNext(); + } + + @Override + public String next() + { + return getStringValueFromGlobalId(localIterator.next()); + } + }; + } + @Override public ImmutableBitmap getBitmap(int idx) { return NestedFieldColumnIndexSupplier.this.getBitmap(idx); } + + @Nullable + private String getStringValueFromGlobalId(int globalIndex) + { + if (globalIndex == 0) { + return null; + } + if (globalIndex < adjustLongId) { + return StringUtils.fromUtf8Nullable(stringDictionary.get(globalIndex)); + } else if (globalIndex < adjustDoubleId) { + return String.valueOf(longDictionary.get(globalIndex - adjustLongId)); + } else { + return String.valueOf(doubleDictionary.get(globalIndex - adjustDoubleId)); + } + } } private class NestedStringValueSetIndexes implements StringValueSetIndexes diff --git a/processing/src/main/java/org/apache/druid/segment/nested/ScalarDoubleColumnAndIndexSupplier.java b/processing/src/main/java/org/apache/druid/segment/nested/ScalarDoubleColumnAndIndexSupplier.java index ac871372cdaf..81826eadb928 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/ScalarDoubleColumnAndIndexSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/ScalarDoubleColumnAndIndexSupplier.java @@ -34,6 +34,7 @@ import org.apache.druid.collections.bitmap.ImmutableBitmap; import org.apache.druid.java.util.common.RE; import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.math.expr.Evals; import org.apache.druid.math.expr.ExprEval; import org.apache.druid.math.expr.ExpressionType; import org.apache.druid.query.BitmapResultFactory; @@ -635,7 +636,7 @@ public int getCardinality() public String getValue(int index) { final Double value = dictionary.get(index); - return value == null ? null : String.valueOf(value); + return Evals.asString(value); } @Override @@ -643,5 +644,25 @@ public BitmapFactory getBitmapFactory() { return bitmapFactory; } + + @Override + public Iterator getValueIterator() + { + final Iterator delegate = dictionary.iterator(); + return new Iterator<>() + { + @Override + public boolean hasNext() + { + return delegate.hasNext(); + } + + @Override + public String next() + { + return Evals.asString(delegate.next()); + } + }; + } } } diff --git a/processing/src/main/java/org/apache/druid/segment/nested/ScalarLongColumnAndIndexSupplier.java b/processing/src/main/java/org/apache/druid/segment/nested/ScalarLongColumnAndIndexSupplier.java index 03ba8e4b4fd0..0818354ed5b7 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/ScalarLongColumnAndIndexSupplier.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/ScalarLongColumnAndIndexSupplier.java @@ -33,6 +33,7 @@ import org.apache.druid.common.guava.GuavaUtils; import org.apache.druid.java.util.common.RE; import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.math.expr.Evals; import org.apache.druid.math.expr.ExprEval; import org.apache.druid.math.expr.ExpressionType; import org.apache.druid.query.BitmapResultFactory; @@ -646,7 +647,7 @@ public int getCardinality() public String getValue(int index) { final Long value = dictionary.get(index); - return value == null ? null : String.valueOf(value); + return Evals.asString(value); } @Override @@ -654,5 +655,25 @@ public BitmapFactory getBitmapFactory() { return bitmapFactory; } + + @Override + public Iterator getValueIterator() + { + final Iterator delegate = dictionary.iterator(); + return new Iterator<>() + { + @Override + public boolean hasNext() + { + return delegate.hasNext(); + } + + @Override + public String next() + { + return Evals.asString(delegate.next()); + } + }; + } } } diff --git a/processing/src/main/java/org/apache/druid/segment/virtual/ListFilteredVirtualColumn.java b/processing/src/main/java/org/apache/druid/segment/virtual/ListFilteredVirtualColumn.java index 271473f481ef..41b58b04a398 100644 --- a/processing/src/main/java/org/apache/druid/segment/virtual/ListFilteredVirtualColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/virtual/ListFilteredVirtualColumn.java @@ -720,6 +720,27 @@ public BitmapFactory getBitmapFactory() return delegate.getBitmapFactory(); } + @Override + public Iterator getValueIterator() + { + return new Iterator<>() + { + int position = 0; + + @Override + public boolean hasNext() + { + return position < idMapping.getValueCardinality(); + } + + @Override + public String next() + { + return delegate.getValue(idMapping.getReverseId(position++)); + } + }; + } + @Override public ImmutableBitmap getBitmap(int idx) { diff --git a/services/src/main/java/org/apache/druid/cli/DumpSegment.java b/services/src/main/java/org/apache/druid/cli/DumpSegment.java index 1986fb1ad9ac..d6b8336b1679 100644 --- a/services/src/main/java/org/apache/druid/cli/DumpSegment.java +++ b/services/src/main/java/org/apache/druid/cli/DumpSegment.java @@ -86,7 +86,6 @@ import org.apache.druid.segment.data.BitmapSerdeFactory; import org.apache.druid.segment.data.ConciseBitmapSerdeFactory; import org.apache.druid.segment.data.FixedIndexed; -import org.apache.druid.segment.data.Indexed; import org.apache.druid.segment.data.RoaringBitmapSerdeFactory; import org.apache.druid.segment.file.SegmentFileMapperV10; import org.apache.druid.segment.filter.Filters; @@ -108,6 +107,7 @@ import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.EnumSet; +import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; @@ -488,21 +488,21 @@ public static void runDumpNestedColumn( } jg.writeEndArray(); - Indexed globalStringDictionary = nestedDataColumn.getUtf8BytesDictionary(); - Indexed globalLongDictionary = nestedDataColumn.getLongDictionary(); - Indexed globalDoubleDictionary = nestedDataColumn.getDoubleDictionary(); + Iterator globalStringIterator = nestedDataColumn.getUtf8BytesDictionary().iterator(); + Iterator globalLongIterator = nestedDataColumn.getLongDictionary().iterator(); + Iterator globalDoubleIterator = nestedDataColumn.getDoubleDictionary().iterator(); jg.writeFieldName("dictionaries"); jg.writeStartObject(); { int globalId = 0; jg.writeFieldName("strings"); jg.writeStartArray(); - for (int i = 0; i < globalStringDictionary.size(); i++, globalId++) { + while (globalStringIterator.hasNext()) { jg.writeStartObject(); jg.writeFieldName("globalId"); - jg.writeNumber(globalId); + jg.writeNumber(globalId++); jg.writeFieldName("value"); - final ByteBuffer val = globalStringDictionary.get(i); + final ByteBuffer val = globalStringIterator.next(); if (val == null) { jg.writeNull(); } else { @@ -514,24 +514,24 @@ public static void runDumpNestedColumn( jg.writeFieldName("longs"); jg.writeStartArray(); - for (int i = 0; i < globalLongDictionary.size(); i++, globalId++) { + while (globalLongIterator.hasNext()) { jg.writeStartObject(); jg.writeFieldName("globalId"); - jg.writeNumber(globalId); + jg.writeNumber(globalId++); jg.writeFieldName("value"); - jg.writeNumber(globalLongDictionary.get(i)); + jg.writeNumber(globalLongIterator.next()); jg.writeEndObject(); } jg.writeEndArray(); jg.writeFieldName("doubles"); jg.writeStartArray(); - for (int i = 0; i < globalDoubleDictionary.size(); i++, globalId++) { + while (globalDoubleIterator.hasNext()) { jg.writeStartObject(); jg.writeFieldName("globalId"); - jg.writeNumber(globalId); + jg.writeNumber(globalId++); jg.writeFieldName("value"); - jg.writeNumber(globalDoubleDictionary.get(i)); + jg.writeNumber(globalDoubleIterator.next()); jg.writeEndObject(); } jg.writeEndArray();