Skip to content

Commit

Permalink
Add estimateCompactSize
Browse files Browse the repository at this point in the history
  • Loading branch information
kewang1024 committed Apr 1, 2024
1 parent 865ba33 commit 12e1fe8
Show file tree
Hide file tree
Showing 9 changed files with 184 additions and 15 deletions.
10 changes: 10 additions & 0 deletions velox/vector/BaseVector.h
Original file line number Diff line number Diff line change
Expand Up @@ -723,6 +723,16 @@ class BaseVector {
/// hasn't been loaded yet.
virtual uint64_t estimateFlatSize() const;

/// Returns an estimate size of each row in this vector.
virtual uint64_t estimateRowSize() const;

/// Returns an estimate size of the vector as if it was compacted, ignoring any over-allocations.
/// (1) For example, in dictionary vector, this only counts each dictionary entry once,
/// rather than each time a value is referenced.
/// (2) For flat vector, returns its retained size.
/// (3) For lazy vector that hasn't been loaded, returns zero.
virtual uint64_t estimateCompactSize() const;

/// To safely reuse a vector one needs to (1) ensure that the vector as well
/// as all its buffers and child vectors are singly-referenced and mutable
/// (for buffers); (2) clear append-only string buffers and child vectors
Expand Down
36 changes: 36 additions & 0 deletions velox/vector/ComplexVector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -448,6 +448,21 @@ uint64_t RowVector::estimateFlatSize() const {
return total;
}

uint64_t RowVector::estimateCompactSize() const {
uint64_t total = BaseVector::retainedSize();
for (const auto& child : children_) {
if (child) {
total += child->estimateCompactSize();
}
}
return total;
}

uint64_t RowVector::estimateRowSize() const {
// todo
return 0;
}

void RowVector::prepareForReuse() {
BaseVector::prepareForReuse();
for (auto& child : children_) {
Expand Down Expand Up @@ -979,6 +994,16 @@ uint64_t ArrayVector::estimateFlatSize() const {
sizes_->capacity() + elements_->estimateFlatSize();
}

uint64_t ArrayVector::estimateCompactSize() const {
return BaseVector::retainedSize() + offsets_->capacity() +
sizes_->capacity() + elements_->estimateCompactSize();
}

uint64_t ArrayVector::estimateRowSize() const {
// todo
return 0;
}

namespace {
void zeroOutBuffer(BufferPtr buffer) {
memset(buffer->asMutable<char>(), 0, buffer->size());
Expand Down Expand Up @@ -1283,6 +1308,17 @@ uint64_t MapVector::estimateFlatSize() const {
values_->estimateFlatSize();
}

uint64_t MapVector::estimateCompactSize() const {
return BaseVector::retainedSize() + offsets_->capacity() +
sizes_->capacity() + keys_->estimateCompactSize() +
values_->estimateCompactSize();
}

uint64_t MapVector::estimateRowSize() const {
// todo
return 0;
}

void MapVector::prepareForReuse() {
BaseVector::prepareForReuse();

Expand Down
10 changes: 10 additions & 0 deletions velox/vector/ComplexVector.h
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,9 @@ class RowVector : public BaseVector {

uint64_t estimateFlatSize() const override;

uint64_t estimateCompactSize() const override;
uint64_t estimateRowSize() const override;

using BaseVector::toString;

std::string toString(vector_size_t index) const override;
Expand Down Expand Up @@ -457,6 +460,10 @@ class ArrayVector : public ArrayVectorBase {

uint64_t estimateFlatSize() const override;

uint64_t estimateCompactSize() const override;

uint64_t estimateRowSize() const override;

using BaseVector::toString;

std::string toString(vector_size_t index) const override;
Expand Down Expand Up @@ -586,6 +593,9 @@ class MapVector : public ArrayVectorBase {

uint64_t estimateFlatSize() const override;

uint64_t estimateCompactSize() const override;
uint64_t estimateRowSize() const override;

using BaseVector::toString;

std::string toString(vector_size_t index) const override;
Expand Down
8 changes: 8 additions & 0 deletions velox/vector/ConstantVector.h
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,14 @@ class ConstantVector final : public SimpleVector<T> {
return sizeof(T);
}

uint64_t estimateRowSize() const override {
return valueVector_ ? valueVector_->estimateRowSize() : retainedSize();
}

uint64_t estimateCompactSize() const override {
return 1 * estimateRowSize();
}

BaseVector* loadedVector() override {
if (!valueVector_) {
return this;
Expand Down
15 changes: 15 additions & 0 deletions velox/vector/DictionaryVector.h
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,21 @@ class DictionaryVector : public SimpleVector<T> {
indices_->capacity();
}

uint64_t estimateRowSize() const override {
return dictionaryValues_->estimateRowSize();
}

uint64_t estimateCompactSize() const override {
std::unordered_set<int32_t> distinctCount;
if (indices() != nullptr) {
const int32_t* rawIndices = indices_->as<int32_t>();
for (int i = 0; i < BaseVector::length_ && !BaseVector::isNullAt(i); i++) {
distinctCount.insert(rawIndices[i]);
}
}
return distinctCount.size() * estimateRowSize();
}

bool isScalar() const override {
return dictionaryValues_->isScalar();
}
Expand Down
8 changes: 8 additions & 0 deletions velox/vector/FlatVector.h
Original file line number Diff line number Diff line change
Expand Up @@ -386,6 +386,14 @@ class FlatVector final : public SimpleVector<T> {
return size;
}

uint64_t estimateRowSize() const override {
return retainedSize() / BaseVector::length_;
}

uint64_t estimateCompactSize() const override {
return retainedSize();
}

/**
* Used for vectors of type VARCHAR and VARBINARY to hold data referenced by
* StringView's. It is safe to share these among multiple vectors. These
Expand Down
8 changes: 8 additions & 0 deletions velox/vector/SequenceVector.h
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,14 @@ class SequenceVector : public SimpleVector<T> {
return sequenceValues_->retainedSize() + sequenceLengths_->capacity();
}

uint64_t estimateRowSize() const override {
return sequenceValues_->estimateRowSize();
}

uint64_t estimateCompactSize() const override {
return sequenceLengths_->size() * estimateRowSize();
}

bool isScalar() const override {
return sequenceValues_->isScalar();
}
Expand Down
2 changes: 1 addition & 1 deletion velox/vector/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ add_executable(
SelectivityVectorTest.cpp
VariantToVectorTest.cpp
VectorCompareTest.cpp
VectorEstimateFlatSizeTest.cpp
VectorEstimateSizeTest.cpp
VectorMakerTest.cpp
VectorPoolTest.cpp
VectorPrepareForReuseTest.cpp
Expand Down
Loading

0 comments on commit 12e1fe8

Please sign in to comment.