Skip to content

Commit ad5d8d2

Browse files
committed
Revert back bitmap_op fast path
1 parent 6cb3985 commit ad5d8d2

File tree

1 file changed

+54
-0
lines changed

1 file changed

+54
-0
lines changed

src/types/redis_bitmap.cc

+54
Original file line numberDiff line numberDiff line change
@@ -549,7 +549,61 @@ rocksdb::Status Bitmap::BitOp(BitOpFlags op_flag, const std::string &op_name, co
549549
} else {
550550
memset(frag_res.get(), 0, frag_maxlen);
551551
}
552+
/* Fast path: as far as we have data for all the input bitmaps we
553+
* can take a fast path that performs much better than the
554+
* vanilla algorithm.
555+
* We hope the compiler will generate a better code for memcpy
556+
* rather than keep this fast path only in ARM machine.
557+
*/
558+
if (frag_minlen >= sizeof(uint64_t) * 4 && frag_numkeys <= 16) {
559+
uint8_t *lres = frag_res.get();
560+
const uint8_t *lp[16];
561+
for (uint64_t i = 0; i < frag_numkeys; i++) {
562+
lp[i] = reinterpret_cast<const uint8_t *>(fragments[i].data());
563+
}
564+
memcpy(frag_res.get(), fragments[0].data(), frag_minlen);
565+
auto apply_fast_path_op = [&](auto op) {
566+
// Note: kBitOpNot cannot use this op, it only applying
567+
// to kBitOpAnd, kBitOpOr, kBitOpXor.
568+
DCHECK(op_flag != kBitOpNot);
569+
while (frag_minlen >= sizeof(uint64_t) * 4) {
570+
uint64_t lres_u64[4];
571+
uint64_t lp_u64[4];
572+
memcpy(lres_u64, lres, sizeof(lres_u64));
573+
memcpy(lp_u64, lp[0], sizeof(lp_u64));
574+
for (uint64_t i = 1; i < frag_numkeys; i++) {
575+
op(lres, lp[i]);
576+
op(lres + 8, lp[i] + 8);
577+
op(lres + 8 * 2, lp[i] + 8 * 2);
578+
op(lres + 8 * 3, lp[i] + 8 * 3);
579+
lp[i] += 4;
580+
}
581+
// memcpy back to lres
582+
memcpy(lres, lres_u64, sizeof(lres_u64));
583+
lres += 4;
584+
j += sizeof(uint64_t) * 4;
585+
frag_minlen -= sizeof(uint64_t) * 4;
586+
}
587+
};
552588

589+
if (op_flag == kBitOpAnd) {
590+
apply_fast_path_op([](uint64_t &a, const uint64_t &b) { a &= b; });
591+
} else if (op_flag == kBitOpOr) {
592+
apply_fast_path_op([](uint64_t &a, const uint64_t &b) { a |= b; });
593+
} else if (op_flag == kBitOpXor) {
594+
apply_fast_path_op([](uint64_t &a, const uint64_t &b) { a ^= b; });
595+
} else if (op_flag == kBitOpNot) {
596+
while (frag_minlen >= sizeof(uint64_t) * 4) {
597+
lres[0] = ~lres[0];
598+
lres[1] = ~lres[1];
599+
lres[2] = ~lres[2];
600+
lres[3] = ~lres[3];
601+
lres += 4;
602+
j += sizeof(uint64_t) * 4;
603+
frag_minlen -= sizeof(uint64_t) * 4;
604+
}
605+
}
606+
}
553607
uint8_t output = 0, byte = 0;
554608
for (; j < frag_maxlen; j++) {
555609
output = (fragments[0].size() <= j) ? 0 : fragments[0][j];

0 commit comments

Comments
 (0)