Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[opt](arm) Optimize the BlockBloomFilter::bucket_find on ARM platform… #43508

Open
wants to merge 1 commit into
base: branch-2.1
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 35 additions & 1 deletion be/src/exprs/block_bloom_filter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,39 @@ class BlockBloomFilter {
return false;
}

#ifdef __ARM_NEON
void make_find_mask(uint32_t key, uint32x4_t* masks) const noexcept {
uint32x4_t hash_data_1 = vdupq_n_u32(key);
uint32x4_t hash_data_2 = vdupq_n_u32(key);

uint32x4_t rehash_1 = vld1q_u32(&kRehash[0]);
uint32x4_t rehash_2 = vld1q_u32(&kRehash[4]);

// masks[i] = key * kRehash[i];
hash_data_1 = vmulq_u32(rehash_1, hash_data_1);
hash_data_2 = vmulq_u32(rehash_2, hash_data_2);
// masks[i] = masks[i] >> shift_num;
hash_data_1 = vshrq_n_u32(hash_data_1, shift_num);
hash_data_2 = vshrq_n_u32(hash_data_2, shift_num);

const uint32x4_t ones = vdupq_n_u32(1);

// masks[i] = 0x1 << masks[i];
masks[0] = vshlq_u32(ones, reinterpret_cast<int32x4_t>(hash_data_1));
masks[1] = vshlq_u32(ones, reinterpret_cast<int32x4_t>(hash_data_2));
}
#else
void make_find_mask(uint32_t key, uint32_t* masks) const noexcept {
for (int i = 0; i < kBucketWords; ++i) {
masks[i] = key * kRehash[i];

masks[i] = masks[i] >> shift_num;

masks[i] = 0x1 << masks[i];
}
}
#endif

// Computes the logical OR of this filter with 'other' and stores the result in this
// filter.
// Notes:
Expand Down Expand Up @@ -163,7 +196,8 @@ class BlockBloomFilter {
// log2(number of bits in a BucketWord)
static constexpr int kLogBucketWordBits = 5;
static constexpr BucketWord kBucketWordMask = (1 << kLogBucketWordBits) - 1;

// (>> 27) is equivalent to (mod 32)
static constexpr auto shift_num = ((1 << kLogBucketWordBits) - kLogBucketWordBits);
// log2(number of bytes in a bucket)
static constexpr int kLogBucketByteSize = 5;
// Bucket size in bytes.
Expand Down
29 changes: 26 additions & 3 deletions be/src/exprs/block_bloom_filter_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -138,14 +138,37 @@ void BlockBloomFilter::bucket_insert(const uint32_t bucket_idx, const uint32_t h
}

bool BlockBloomFilter::bucket_find(const uint32_t bucket_idx, const uint32_t hash) const noexcept {
#if defined(__ARM_NEON)
uint32x4_t masks[2];

uint32x4_t directory_1 = vld1q_u32(&_directory[bucket_idx][0]);
uint32x4_t directory_2 = vld1q_u32(&_directory[bucket_idx][4]);

make_find_mask(hash, masks);
// The condition for returning true is that all the bits in _directory[bucket_idx][i] specified by masks[i] are 1.
// This can be equivalently expressed as all the bits in not( _directory[bucket_idx][i]) specified by masks[i] are 0.
// vbicq_u32(vec1, vec2) : Result of (vec1 AND NOT vec2)
// If true is returned, out_1 and out_2 should be all zeros.
uint32x4_t out_1 = vbicq_u32(masks[0], directory_1);
uint32x4_t out_2 = vbicq_u32(masks[1], directory_2);

out_1 = vorrq_u32(out_1, out_2);

uint32x2_t low = vget_low_u32(out_1);
uint32x2_t high = vget_high_u32(out_1);
low = vorr_u32(low, high);
uint32_t res = vget_lane_u32(low, 0) | vget_lane_u32(low, 1);
return !(res);
#else
uint32_t masks[kBucketWords];
make_find_mask(hash, masks);
for (int i = 0; i < kBucketWords; ++i) {
BucketWord hval = (kRehash[i] * hash) >> ((1 << kLogBucketWordBits) - kLogBucketWordBits);
hval = 1U << hval;
if (!(DCHECK_NOTNULL(_directory)[bucket_idx][i] & hval)) {
if ((DCHECK_NOTNULL(_directory)[bucket_idx][i] & masks[i]) == 0) {
return false;
}
}
return true;
#endif
}

void BlockBloomFilter::insert_no_avx2(const uint32_t hash) noexcept {
Expand Down
Loading