diff --git a/velox/vector/BaseVector.cpp b/velox/vector/BaseVector.cpp index 5d382aac6d8a6..fdc565960e6ac 100644 --- a/velox/vector/BaseVector.cpp +++ b/velox/vector/BaseVector.cpp @@ -828,6 +828,14 @@ uint64_t BaseVector::estimateFlatSize() const { return length_ * avgRowSize; } +double BaseVector::estimateRowSize() const { + return estimateFlatSize() / length_; +} + +uint64_t BaseVector::estimateCompactSize() const { + return estimateFlatSize(); +} + namespace { bool isReusableEncoding(VectorEncoding::Simple encoding) { return encoding == VectorEncoding::Simple::FLAT || diff --git a/velox/vector/BaseVector.h b/velox/vector/BaseVector.h index 0756886b839c9..bf8ccd4bb22cd 100644 --- a/velox/vector/BaseVector.h +++ b/velox/vector/BaseVector.h @@ -727,6 +727,17 @@ class BaseVector { /// hasn't been loaded yet. virtual uint64_t estimateFlatSize() const; + /// Returns an estimate size of each row in this vector. + virtual double estimateRowSize() const; + + /// Returns an estimate size of the vector as if it was compacted, + /// ignoring any over-allocations. For example: + /// (1) For dictionary vector, it only counts each dictionary entry once, + /// rather than each time a value is referenced. + /// (2) For flat vector, returns its retained size. + /// (3) For lazy vector that hasn't been loaded, returns zero. + virtual uint64_t estimateCompactSize() const; + /// To safely reuse a vector one needs to (1) ensure that the vector as well /// as all its buffers and child vectors are singly-referenced and mutable /// (for buffers); (2) clear append-only string buffers and child vectors diff --git a/velox/vector/ConstantVector.h b/velox/vector/ConstantVector.h index 18ff32198f702..b5decf0d8c026 100644 --- a/velox/vector/ConstantVector.h +++ b/velox/vector/ConstantVector.h @@ -205,6 +205,14 @@ class ConstantVector final : public SimpleVector { return sizeof(T); } + double estimateRowSize() const override { + return valueVector_ ? valueVector_->estimateRowSize() : retainedSize(); + } + + uint64_t estimateCompactSize() const override { + return 1 * estimateRowSize(); + } + BaseVector* loadedVector() override { if (!valueVector_) { return this; diff --git a/velox/vector/DictionaryVector.h b/velox/vector/DictionaryVector.h index c08c5569bc965..a3dfb6d4db80d 100644 --- a/velox/vector/DictionaryVector.h +++ b/velox/vector/DictionaryVector.h @@ -148,6 +148,27 @@ class DictionaryVector : public SimpleVector { indices_->capacity(); } + double estimateRowSize() const override { + return 1.0 * dictionaryValues_->estimateRowSize(); + } + + uint64_t estimateCompactSize() const override { + if (indices() == nullptr) { + return 0; + } + + int uniqueIds = 0; + std::vector used(dictionaryValues_->size()); + const int32_t* rawIndices = indices_->as(); + for (int i = 0; i < BaseVector::length_; i++) { + if (!BaseVector::isNullAt(i)) { + uniqueIds += used[rawIndices[i]] ? 0 : 1; + used[rawIndices[i]] = true; + } + } + return uniqueIds * estimateRowSize(); + } + bool isScalar() const override { return dictionaryValues_->isScalar(); } diff --git a/velox/vector/FlatVector.h b/velox/vector/FlatVector.h index b9ca51af67b91..67543603c39b1 100644 --- a/velox/vector/FlatVector.h +++ b/velox/vector/FlatVector.h @@ -386,6 +386,17 @@ class FlatVector final : public SimpleVector { return size; } + double estimateRowSize() const override { + if (BaseVector::length_ == 0) { + return 0; + } + return 1.0 * retainedSize() / BaseVector::length_; + } + + uint64_t estimateCompactSize() const override { + return retainedSize(); + } + /** * Used for vectors of type VARCHAR and VARBINARY to hold data referenced by * StringView's. It is safe to share these among multiple vectors. These diff --git a/velox/vector/SequenceVector.h b/velox/vector/SequenceVector.h index 03b25edb6bbc7..f01c83833b377 100644 --- a/velox/vector/SequenceVector.h +++ b/velox/vector/SequenceVector.h @@ -153,6 +153,14 @@ class SequenceVector : public SimpleVector { return sequenceValues_->retainedSize() + sequenceLengths_->capacity(); } + double estimateRowSize() const override { + return sequenceValues_->estimateRowSize(); + } + + uint64_t estimateCompactSize() const override { + return sequenceLengths_->size() * estimateRowSize(); + } + bool isScalar() const override { return sequenceValues_->isScalar(); } diff --git a/velox/vector/tests/VectorEstimateFlatSizeTest.cpp b/velox/vector/tests/VectorEstimateFlatSizeTest.cpp index 4fa79ea5870d8..c32102871521e 100644 --- a/velox/vector/tests/VectorEstimateFlatSizeTest.cpp +++ b/velox/vector/tests/VectorEstimateFlatSizeTest.cpp @@ -18,7 +18,7 @@ using namespace facebook::velox; -class VectorEstimateFlatSizeTest : public testing::Test, +class VectorEstimateSizeTest : public testing::Test, public test::VectorTestBase { protected: using test::VectorTestBase::makeArrayVector; @@ -71,61 +71,73 @@ StringView shortStringAt(vector_size_t row) { }; } // namespace -TEST_F(VectorEstimateFlatSizeTest, fixedWidthNoNulls) { +TEST_F(VectorEstimateSizeTest, fixedWidthNoNulls) { // Fixed width vectors without nulls. VectorPtr flat = makeFlatVector(1'000, int16At); EXPECT_EQ(2976, flat->retainedSize()); EXPECT_EQ(2976, flat->estimateFlatSize()); + EXPECT_EQ(2976, flat->estimateCompactSize()); flat = makeFlatVector(1'000, int32At); EXPECT_EQ(4000, flat->retainedSize()); EXPECT_EQ(4000, flat->estimateFlatSize()); + EXPECT_EQ(4000, flat->estimateCompactSize()); flat = makeFlatVector(1'000, int64At); EXPECT_EQ(8096, flat->retainedSize()); EXPECT_EQ(8096, flat->estimateFlatSize()); + EXPECT_EQ(8096, flat->estimateCompactSize()); flat = makeFlatVector(1'000, floatAt); EXPECT_EQ(4000, flat->retainedSize()); EXPECT_EQ(4000, flat->estimateFlatSize()); + EXPECT_EQ(4000, flat->estimateCompactSize()); flat = makeFlatVector(1'000, doubleAt); EXPECT_EQ(8096, flat->retainedSize()); EXPECT_EQ(8096, flat->estimateFlatSize()); + EXPECT_EQ(8096, flat->estimateCompactSize()); flat = makeFlatVector(1'000, boolAt); EXPECT_EQ(160, flat->retainedSize()); EXPECT_EQ(160, flat->estimateFlatSize()); + EXPECT_EQ(160, flat->estimateCompactSize()); } -TEST_F(VectorEstimateFlatSizeTest, fixedWidthWithNulls) { +TEST_F(VectorEstimateSizeTest, fixedWidthWithNulls) { // Fixed width vectors with nulls. Nulls buffer adds a few bytes. VectorPtr flat = makeFlatVector(1'000, int16At, nullEvery(5)); EXPECT_EQ(3136, flat->retainedSize()); EXPECT_EQ(3136, flat->estimateFlatSize()); + EXPECT_EQ(3136, flat->estimateCompactSize()); flat = makeFlatVector(1'000, int32At, nullEvery(5)); EXPECT_EQ(4160, flat->retainedSize()); EXPECT_EQ(4160, flat->estimateFlatSize()); + EXPECT_EQ(4160, flat->estimateCompactSize()); flat = makeFlatVector(1'000, int64At, nullEvery(5)); EXPECT_EQ(8256, flat->retainedSize()); EXPECT_EQ(8256, flat->estimateFlatSize()); + EXPECT_EQ(8256, flat->estimateCompactSize()); flat = makeFlatVector(1'000, floatAt, nullEvery(5)); EXPECT_EQ(4160, flat->retainedSize()); EXPECT_EQ(4160, flat->estimateFlatSize()); + EXPECT_EQ(4160, flat->estimateCompactSize()); flat = makeFlatVector(1'000, doubleAt, nullEvery(5)); EXPECT_EQ(8256, flat->retainedSize()); EXPECT_EQ(8256, flat->estimateFlatSize()); + EXPECT_EQ(8256, flat->estimateCompactSize()); flat = makeFlatVector(1'000, boolAt, nullEvery(5)); EXPECT_EQ(320, flat->retainedSize()); EXPECT_EQ(320, flat->estimateFlatSize()); + EXPECT_EQ(320, flat->estimateCompactSize()); } -TEST_F(VectorEstimateFlatSizeTest, dictionaryFixedWidthNoExtraNulls) { +TEST_F(VectorEstimateSizeTest, dictionaryFixedWidthNoExtraNulls) { // Dictionary vector. Indices buffer adds a few bytes. auto indices = makeIndices(100, [](auto row) { return row * 2; }); @@ -133,37 +145,44 @@ TEST_F(VectorEstimateFlatSizeTest, dictionaryFixedWidthNoExtraNulls) { return wrapInDictionary(indices, 100, base); }; + // Distinct indices auto dict = makeDict(makeFlatVector(1'000, int16At)); EXPECT_EQ(3392, dict->retainedSize()); EXPECT_EQ(297, dict->estimateFlatSize()); EXPECT_EQ(288, flatten(dict)->retainedSize()); + EXPECT_EQ(297, dict->estimateCompactSize()); dict = makeDict(makeFlatVector(1'000, int32At)); EXPECT_EQ(4416, dict->retainedSize()); EXPECT_EQ(400, dict->estimateFlatSize()); EXPECT_EQ(416, flatten(dict)->retainedSize()); + EXPECT_EQ(400, dict->estimateCompactSize()); dict = makeDict(makeFlatVector(1'000, int64At)); EXPECT_EQ(8512, dict->retainedSize()); EXPECT_EQ(809, dict->estimateFlatSize()); EXPECT_EQ(928, flatten(dict)->retainedSize()); + EXPECT_EQ(809, dict->estimateCompactSize()); dict = makeDict(makeFlatVector(1'000, floatAt)); EXPECT_EQ(4416, dict->retainedSize()); EXPECT_EQ(400, dict->estimateFlatSize()); EXPECT_EQ(416, flatten(dict)->retainedSize()); + EXPECT_EQ(400, dict->estimateCompactSize()); dict = makeDict(makeFlatVector(1'000, doubleAt)); EXPECT_EQ(8512, dict->retainedSize()); EXPECT_EQ(809, dict->estimateFlatSize()); EXPECT_EQ(928, flatten(dict)->retainedSize()); + EXPECT_EQ(809, dict->estimateCompactSize()); dict = makeDict(makeFlatVector(1'000, boolAt)); EXPECT_EQ(576, dict->retainedSize()); EXPECT_EQ(16, dict->estimateFlatSize()); EXPECT_EQ(32, flatten(dict)->retainedSize()); + EXPECT_EQ(16, dict->estimateCompactSize()); - // 2 levels of dictionary encoding. + // 2 levels of dictionary encoding with distinct indices. auto makeDoubleDict = [&](auto base) { return wrapInDictionary(indices, 50, wrapInDictionary(indices, 100, base)); }; @@ -172,34 +191,62 @@ TEST_F(VectorEstimateFlatSizeTest, dictionaryFixedWidthNoExtraNulls) { EXPECT_EQ(3808, dict->retainedSize()); EXPECT_EQ(148, dict->estimateFlatSize()); EXPECT_EQ(160, flatten(dict)->retainedSize()); + EXPECT_EQ(148, dict->estimateCompactSize()); dict = makeDoubleDict(makeFlatVector(1'000, int32At)); EXPECT_EQ(4832, dict->retainedSize()); EXPECT_EQ(200, dict->estimateFlatSize()); EXPECT_EQ(288, flatten(dict)->retainedSize()); + EXPECT_EQ(200, dict->estimateCompactSize()); dict = makeDoubleDict(makeFlatVector(1'000, int64At)); EXPECT_EQ(8928, dict->retainedSize()); EXPECT_EQ(404, dict->estimateFlatSize()); EXPECT_EQ(416, flatten(dict)->retainedSize()); + EXPECT_EQ(404, dict->estimateCompactSize()); dict = makeDoubleDict(makeFlatVector(1'000, floatAt)); EXPECT_EQ(4832, dict->retainedSize()); EXPECT_EQ(200, dict->estimateFlatSize()); EXPECT_EQ(288, flatten(dict)->retainedSize()); + EXPECT_EQ(200, dict->estimateCompactSize()); dict = makeDoubleDict(makeFlatVector(1'000, doubleAt)); EXPECT_EQ(8928, dict->retainedSize()); EXPECT_EQ(404, dict->estimateFlatSize()); EXPECT_EQ(416, flatten(dict)->retainedSize()); + EXPECT_EQ(404, dict->estimateCompactSize()); dict = makeDoubleDict(makeFlatVector(1'000, boolAt)); EXPECT_EQ(992, dict->retainedSize()); EXPECT_EQ(8, dict->estimateFlatSize()); EXPECT_EQ(32, flatten(dict)->retainedSize()); + EXPECT_EQ(8, dict->estimateCompactSize()); + + // duplicate indices + auto duplicateIndices = makeIndices(100, [](auto row) { return 1; }); + auto makeDuplicateDict = [&](auto base) { + return wrapInDictionary(duplicateIndices, 100, base); + }; + dict = makeDuplicateDict(makeFlatVector(1'000, int16At)); + EXPECT_EQ(3392, dict->retainedSize()); + EXPECT_EQ(297, dict->estimateFlatSize()); + EXPECT_EQ(288, flatten(dict)->retainedSize()); + EXPECT_EQ(2, dict->estimateCompactSize()); + + // 2 levels of dictionary encoding with duplicate indices. + auto makeDuplicateDoubleDict = [&](auto base) { + return wrapInDictionary(duplicateIndices, 50, wrapInDictionary(duplicateIndices, 100, base)); + }; + + dict = makeDuplicateDoubleDict(makeFlatVector(1'000, int16At)); + EXPECT_EQ(3808, dict->retainedSize()); + EXPECT_EQ(148, dict->estimateFlatSize()); + EXPECT_EQ(160, flatten(dict)->retainedSize()); + EXPECT_EQ(2, dict->estimateCompactSize()); } -TEST_F(VectorEstimateFlatSizeTest, dictionaryFixedWidthExtraNulls) { +TEST_F(VectorEstimateSizeTest, dictionaryFixedWidthExtraNulls) { // Dictionary vector with extra nulls. auto indices = makeIndices(100, [](auto row) { return row * 2; }); @@ -217,31 +264,37 @@ TEST_F(VectorEstimateFlatSizeTest, dictionaryFixedWidthExtraNulls) { EXPECT_EQ(3424, dict->retainedSize()); EXPECT_EQ(297, dict->estimateFlatSize()); EXPECT_EQ(320, flatten(dict)->retainedSize()); + EXPECT_EQ(196, dict->estimateCompactSize()); dict = makeDict(makeFlatVector(1'000, int32At)); EXPECT_EQ(4448, dict->retainedSize()); EXPECT_EQ(400, dict->estimateFlatSize()); EXPECT_EQ(448, flatten(dict)->retainedSize()); + EXPECT_EQ(264, dict->estimateCompactSize()); dict = makeDict(makeFlatVector(1'000, int64At)); EXPECT_EQ(8544, dict->retainedSize()); EXPECT_EQ(809, dict->estimateFlatSize()); EXPECT_EQ(960, flatten(dict)->retainedSize()); + EXPECT_EQ(534, dict->estimateCompactSize()); dict = makeDict(makeFlatVector(1'000, floatAt)); EXPECT_EQ(4448, dict->retainedSize()); EXPECT_EQ(400, dict->estimateFlatSize()); EXPECT_EQ(448, flatten(dict)->retainedSize()); + EXPECT_EQ(264, dict->estimateCompactSize()); dict = makeDict(makeFlatVector(1'000, doubleAt)); EXPECT_EQ(8544, dict->retainedSize()); EXPECT_EQ(809, dict->estimateFlatSize()); EXPECT_EQ(960, flatten(dict)->retainedSize()); + EXPECT_EQ(534, dict->estimateCompactSize()); dict = makeDict(makeFlatVector(1'000, boolAt)); EXPECT_EQ(608, dict->retainedSize()); EXPECT_EQ(16, dict->estimateFlatSize()); EXPECT_EQ(64, flatten(dict)->retainedSize()); + EXPECT_EQ(10, dict->estimateCompactSize()); // Dictionary vector with all nulls over an empty flat vector. auto indicesAllZero = makeIndices(100, [](auto /*row*/) { return 0; }); @@ -261,23 +314,27 @@ TEST_F(VectorEstimateFlatSizeTest, dictionaryFixedWidthExtraNulls) { EXPECT_EQ(448, dict->retainedSize()); EXPECT_EQ(232, dict->estimateFlatSize()); EXPECT_EQ(320, flatten(dict)->retainedSize()); + EXPECT_EQ(0, dict->estimateCompactSize()); dict = makeDictOverEmpty(makeFlatVector(0, doubleAt)); EXPECT_EQ(448, dict->retainedSize()); EXPECT_EQ(832, dict->estimateFlatSize()); EXPECT_EQ(960, flatten(dict)->retainedSize()); + EXPECT_EQ(0, dict->estimateCompactSize()); } -TEST_F(VectorEstimateFlatSizeTest, flatStrings) { +TEST_F(VectorEstimateSizeTest, flatStrings) { // Inlined strings. auto flat = makeFlatVector(1'000, shortStringAt); EXPECT_EQ(16288, flat->retainedSize()); EXPECT_EQ(16288, flat->estimateFlatSize()); + EXPECT_EQ(16288, flat->estimateCompactSize()); // Inlined strings with nulls. flat = makeFlatVector(1'000, shortStringAt, nullEvery(5)); EXPECT_EQ(16448, flat->retainedSize()); EXPECT_EQ(16448, flat->estimateFlatSize()); + EXPECT_EQ(16448, flat->estimateCompactSize()); // Non-inlined strings. auto longStringAt = [&](auto row) { @@ -286,14 +343,16 @@ TEST_F(VectorEstimateFlatSizeTest, flatStrings) { flat = makeFlatVector(1'000, longStringAt); EXPECT_EQ(65344, flat->retainedSize()); EXPECT_EQ(65343, flat->estimateFlatSize()); + EXPECT_EQ(65344, flat->estimateCompactSize()); flat = makeFlatVector(1'000, longStringAt, nullEvery(5)); EXPECT_EQ(65504, flat->retainedSize()); EXPECT_EQ(65504, flat->estimateFlatSize()); + EXPECT_EQ(65504, flat->estimateCompactSize()); } -TEST_F(VectorEstimateFlatSizeTest, dictionaryShortStrings) { - // Inlined strings. +TEST_F(VectorEstimateSizeTest, dictionaryShortStrings) { + // Inlined strings. (Distinct indices) auto indices = makeIndices(100, [](auto row) { return row * 2; }); auto makeDict = [&](auto base) { @@ -303,18 +362,40 @@ TEST_F(VectorEstimateFlatSizeTest, dictionaryShortStrings) { auto dict = makeDict(makeFlatVector(1'000, shortStringAt)); EXPECT_EQ(16704, dict->retainedSize()); EXPECT_EQ(1628, dict->estimateFlatSize()); + EXPECT_EQ(1628, dict->estimateCompactSize()); EXPECT_EQ(1952, flatten(dict)->retainedSize()); - // Inlined strings with nulls. + // Inlined strings with nulls. (Distinct indices) dict = makeDict(makeFlatVector(1'000, shortStringAt, nullEvery(5))); EXPECT_EQ(16864, dict->retainedSize()); EXPECT_EQ(1644, dict->estimateFlatSize()); + EXPECT_EQ(1644, dict->estimateCompactSize()); + EXPECT_EQ(1984, flatten(dict)->retainedSize()); + + // Inlined strings. (Duplicate indices) + auto duplicateIndices = makeIndices(100, [](auto row) { return 1; }); + auto makeDuplicateDict = [&](auto base) { + return wrapInDictionary(duplicateIndices, 100, base); + }; + + dict = makeDuplicateDict(makeFlatVector(1'000, shortStringAt)); + EXPECT_EQ(16704, dict->retainedSize()); + EXPECT_EQ(1628, dict->estimateFlatSize()); + EXPECT_EQ(16, dict->estimateCompactSize()); + EXPECT_EQ(1952, flatten(dict)->retainedSize()); + + // Inlined strings with nulls. (Duplicate indices) + dict = + makeDuplicateDict(makeFlatVector(1'000, shortStringAt, nullEvery(5))); + EXPECT_EQ(16864, dict->retainedSize()); + EXPECT_EQ(1644, dict->estimateFlatSize()); + EXPECT_EQ(16, dict->estimateCompactSize()); EXPECT_EQ(1984, flatten(dict)->retainedSize()); } -TEST_F(VectorEstimateFlatSizeTest, dictionaryLongStrings) { - // Non-inlined strings. +TEST_F(VectorEstimateSizeTest, dictionaryLongStrings) { + // Non-inlined strings. (Distinct indices) auto indices = makeIndices(100, [](auto row) { return row * 2; }); auto makeDict = [&](auto base) { @@ -328,25 +409,119 @@ TEST_F(VectorEstimateFlatSizeTest, dictionaryLongStrings) { auto dict = makeDict(makeFlatVector(1'000, longStringAt)); EXPECT_EQ(65760, dict->retainedSize()); EXPECT_EQ(6534, dict->estimateFlatSize()); + EXPECT_EQ(6534, dict->estimateCompactSize()); // Flatten() method uses BaseVector::copy() which doesn't copy the strings, // but rather copies the shared pointer to the string buffers of the source // vector. Hence, the size of the "flattened" vector includes the size of the // original string buffers. EXPECT_EQ(51008, flatten(dict)->retainedSize()); - // Non-inlined strings with nulls. + // Non-inlined strings with nulls. (Distinct indices) dict = makeDict(makeFlatVector(1'000, longStringAt, nullEvery(5))); EXPECT_EQ(65920, dict->retainedSize()); EXPECT_EQ(6550, dict->estimateFlatSize()); + EXPECT_EQ(6550, dict->estimateCompactSize()); // Flatten() method uses BaseVector::copy() which doesn't copy the strings, // but rather copies the shared pointer to the string buffers of the source // vector. Hence, the size of the "flattened" vector includes the size of the // original string buffers. EXPECT_EQ(51040, flatten(dict)->retainedSize()); + + // Non-inlined strings. (Duplicate indices) + auto duplicateIndices = makeIndices(100, [](auto row) { return 1; }); + + auto makeDuplicateDict = [&](auto base) { + return wrapInDictionary(duplicateIndices, 100, base); + }; + dict = makeDuplicateDict(makeFlatVector(1'000, longStringAt)); + EXPECT_EQ(65760, dict->retainedSize()); + EXPECT_EQ(6534, dict->estimateFlatSize()); + EXPECT_EQ(65, dict->estimateCompactSize()); + EXPECT_EQ(51008, flatten(dict)->retainedSize()); + + // Non-inlined strings with nulls. (Duplicate indices) + dict = makeDuplicateDict(makeFlatVector(1'000, longStringAt, nullEvery(5))); + EXPECT_EQ(65920, dict->retainedSize()); + EXPECT_EQ(6550, dict->estimateFlatSize()); + EXPECT_EQ(65, dict->estimateCompactSize()); + EXPECT_EQ(51040, flatten(dict)->retainedSize()); +} + +TEST_F(VectorEstimateSizeTest, constant) { + // Constant with scalar type + VectorPtr constant = makeConstant(10, 1'000); + EXPECT_EQ(2, constant->retainedSize()); + EXPECT_EQ(2976, flatten(constant)->retainedSize()); + EXPECT_EQ(2, constant->estimateFlatSize()); + EXPECT_EQ(2, constant->estimateCompactSize()); + + constant = makeConstant(10, 1'000); + EXPECT_EQ(4, constant->retainedSize()); + EXPECT_EQ(4000, flatten(constant)->retainedSize()); + EXPECT_EQ(4, constant->estimateFlatSize()); + EXPECT_EQ(4, constant->estimateCompactSize()); + + constant = makeConstant(10, 1'000); + EXPECT_EQ(8, constant->retainedSize()); + EXPECT_EQ(8096, flatten(constant)->retainedSize()); + EXPECT_EQ(8, constant->estimateFlatSize()); + EXPECT_EQ(8, constant->estimateCompactSize()); + + constant = makeConstant(10, 1'000); + EXPECT_EQ(4, constant->retainedSize()); + EXPECT_EQ(4000, flatten(constant)->retainedSize()); + EXPECT_EQ(4, constant->estimateFlatSize()); + EXPECT_EQ(4, constant->estimateCompactSize()); + + constant = makeConstant(10, 1'000); + EXPECT_EQ(8, constant->retainedSize()); + EXPECT_EQ(8096, flatten(constant)->retainedSize()); + EXPECT_EQ(8, constant->estimateFlatSize()); + EXPECT_EQ(8, constant->estimateCompactSize()); + + constant = makeConstant(true, 1'000); + EXPECT_EQ(1, constant->retainedSize()); + EXPECT_EQ(160, flatten(constant)->retainedSize()); + EXPECT_EQ(1, constant->estimateFlatSize()); + EXPECT_EQ(1, constant->estimateCompactSize()); + + std::string s(25, 'x'); + constant = makeConstant(StringView(s), 1'000); + EXPECT_EQ(32, constant->retainedSize()); + EXPECT_EQ(16320, flatten(constant)->retainedSize()); + EXPECT_EQ(32, constant->estimateFlatSize()); + EXPECT_EQ(32, constant->estimateCompactSize()); + + // Constant with complex type + constant = makeConstantArray(1'000, {1, 2, 3}); + EXPECT_EQ(96, constant->retainedSize()); + EXPECT_EQ(32480, flatten(constant)->retainedSize()); + EXPECT_EQ(96000, constant->estimateFlatSize()); + EXPECT_EQ(96, constant->estimateCompactSize()); + + auto rowType = ROW({INTEGER(), VARCHAR()}); + constant = makeConstantRow(rowType, variant::row({1, "hello"}), 1'000); + EXPECT_EQ(64, constant->retainedSize()); + EXPECT_EQ(20288, flatten(constant)->retainedSize()); + EXPECT_EQ(64000, constant->estimateFlatSize()); + EXPECT_EQ(64, constant->estimateCompactSize()); + + auto keys = makeFlatVector(2, int16At); + auto values = makeFlatVector(2, int16At); + constant = makeConstantMap(1'000, keys, values); + EXPECT_EQ(128, constant->retainedSize()); + EXPECT_EQ(24192, flatten(constant)->retainedSize()); + EXPECT_EQ(128000, constant->estimateFlatSize()); + EXPECT_EQ(128, constant->estimateCompactSize()); + +} + +// TODO +TEST_F(VectorEstimateSizeTest, sequence) { } -TEST_F(VectorEstimateFlatSizeTest, arrayOfInts) { +TEST_F(VectorEstimateSizeTest, arrayOfInts) { // Flat array. auto array = makeArrayVector( 1'000, @@ -358,6 +533,7 @@ TEST_F(VectorEstimateFlatSizeTest, arrayOfInts) { EXPECT_EQ(4000, elements->retainedSize()); EXPECT_EQ(12000, array->retainedSize()); EXPECT_EQ(12000, array->estimateFlatSize()); + EXPECT_EQ(12000, array->estimateCompactSize()); // Dictionary-encoded array. auto indices = makeIndices(100, [](auto row) { return row * 2; }); @@ -368,6 +544,7 @@ TEST_F(VectorEstimateFlatSizeTest, arrayOfInts) { EXPECT_EQ(12416, makeDict(array)->retainedSize()); EXPECT_EQ(1200, makeDict(array)->estimateFlatSize()); + EXPECT_EQ(1200, makeDict(array)->estimateCompactSize()); EXPECT_EQ(1248, flatten(makeDict(array))->estimateFlatSize()); // Flat array with dictionary encoded elements. @@ -378,10 +555,11 @@ TEST_F(VectorEstimateFlatSizeTest, arrayOfInts) { ARRAY(INTEGER()), 100, offsets, lengths, makeDict(elements)); EXPECT_EQ(5248, array->retainedSize()); EXPECT_EQ(1232, array->estimateFlatSize()); + EXPECT_EQ(1232, array->estimateCompactSize()); EXPECT_EQ(1248, flatten(array)->estimateFlatSize()); } -TEST_F(VectorEstimateFlatSizeTest, arrayOfShortStrings) { +TEST_F(VectorEstimateSizeTest, arrayOfShortStrings) { // Flat array. auto array = makeArrayVector( 1'000, @@ -393,6 +571,7 @@ TEST_F(VectorEstimateFlatSizeTest, arrayOfShortStrings) { EXPECT_EQ(16288, elements->retainedSize()); EXPECT_EQ(24288, array->retainedSize()); EXPECT_EQ(24288, array->estimateFlatSize()); + EXPECT_EQ(24288, array->estimateCompactSize()); // Dictionary-encoded array. auto indices = makeIndices(100, [](auto row) { return row * 2; }); @@ -413,10 +592,11 @@ TEST_F(VectorEstimateFlatSizeTest, arrayOfShortStrings) { ARRAY(VARCHAR()), 100, offsets, lengths, makeDict(elements)); EXPECT_EQ(17536, array->retainedSize()); EXPECT_EQ(2460, array->estimateFlatSize()); + EXPECT_EQ(2460, array->estimateCompactSize()); EXPECT_EQ(2784, flatten(array)->estimateFlatSize()); } -TEST_F(VectorEstimateFlatSizeTest, arrayOfLongStrings) { +TEST_F(VectorEstimateSizeTest, arrayOfLongStrings) { // Flat array. auto longStringAt = [&](auto row, auto index) { return StringView(longStrings_[(row + index) % 3]); @@ -429,6 +609,7 @@ TEST_F(VectorEstimateFlatSizeTest, arrayOfLongStrings) { EXPECT_EQ(65344, elements->retainedSize()); EXPECT_EQ(73344, array->retainedSize()); EXPECT_EQ(73343, array->estimateFlatSize()); + EXPECT_EQ(73343, array->estimateCompactSize()); // Dictionary-encoded array. auto indices = makeIndices(100, [](auto row) { return row * 2; }); @@ -450,11 +631,12 @@ TEST_F(VectorEstimateFlatSizeTest, arrayOfLongStrings) { ARRAY(VARCHAR()), 100, offsets, lengths, makeDict(elements)); EXPECT_EQ(66592, array->retainedSize()); EXPECT_EQ(7366, array->estimateFlatSize()); + EXPECT_EQ(7366, array->estimateCompactSize()); // Flattened vector includes the original string buffers. EXPECT_EQ(51840, flatten(array)->estimateFlatSize()); } -TEST_F(VectorEstimateFlatSizeTest, mapOfInts) { +TEST_F(VectorEstimateSizeTest, mapOfInts) { // Flat map. auto map = makeMapVector( 1'000, @@ -469,6 +651,7 @@ TEST_F(VectorEstimateFlatSizeTest, mapOfInts) { EXPECT_EQ(8096, values->retainedSize()); EXPECT_EQ(20096, map->retainedSize()); EXPECT_EQ(20096, map->estimateFlatSize()); + EXPECT_EQ(20096, map->estimateCompactSize()); // Dictionary-encoded map. auto indices = makeIndices(100, [](auto row) { return row * 2; }); @@ -496,10 +679,11 @@ TEST_F(VectorEstimateFlatSizeTest, mapOfInts) { makeDict(values)); EXPECT_EQ(13760, map->retainedSize()); EXPECT_EQ(2041, map->estimateFlatSize()); + EXPECT_EQ(2041, map->estimateCompactSize()); EXPECT_EQ(2175, flatten(map)->estimateFlatSize()); } -TEST_F(VectorEstimateFlatSizeTest, structs) { +TEST_F(VectorEstimateSizeTest, structs) { // Flat struct. auto row = makeRowVector({ makeFlatVector(1'000, int32At), @@ -512,6 +696,7 @@ TEST_F(VectorEstimateFlatSizeTest, structs) { EXPECT_EQ(16288, row->childAt(2)->retainedSize()); EXPECT_EQ(28384, row->retainedSize()); EXPECT_EQ(28384, row->estimateFlatSize()); + EXPECT_EQ(28384, row->estimateCompactSize()); // Dictionary-encoded struct. auto indices = makeIndices(100, [](auto row) { return row * 2; }); @@ -531,5 +716,6 @@ TEST_F(VectorEstimateFlatSizeTest, structs) { makeDict(row->childAt(2))}); EXPECT_EQ(29632, row->retainedSize()); EXPECT_EQ(2837, row->estimateFlatSize()); + EXPECT_EQ(2837, row->estimateCompactSize()); EXPECT_EQ(3295, flatten(row)->estimateFlatSize()); } diff --git a/velox/vector/tests/utils/VectorTestBase.h b/velox/vector/tests/utils/VectorTestBase.h index 6f99120cb87b0..de0b06c043b69 100644 --- a/velox/vector/tests/utils/VectorTestBase.h +++ b/velox/vector/tests/utils/VectorTestBase.h @@ -675,6 +675,15 @@ class VectorTestBase { size, 0, vectorMaker_.arrayVector({data})); } + /// Create constant vector of type MAP from keys and values. + VectorPtr makeConstantMap( + vector_size_t size, + const VectorPtr& keys, + const VectorPtr& values) { + return BaseVector::wrapInConstant( + size, 0, vectorMaker_.mapVector({0}, keys, values)); + } + VectorPtr makeNullConstant(TypeKind typeKind, vector_size_t size) { return BaseVector::createNullConstant( createType(typeKind, {}), size, pool());