diff --git a/Benchmark/main.cpp b/Benchmark/main.cpp index 44301045..fe8f6ba8 100644 --- a/Benchmark/main.cpp +++ b/Benchmark/main.cpp @@ -1,829 +1,442 @@ -#include -#include #include -#include -#include -#include -#include -#include "BnchSwt/BenchmarkSuite.hpp" -#include -#include "RandomGenerators.hpp" -#include -#include -#include -#include - -alignas(2) JSONIFIER_INLINE_VARIABLE char charTable1[]{ 0x30u, 0x31u, 0x32u, 0x33u, 0x34u, 0x35u, 0x36u, 0x37u, 0x38u, 0x39u }; - -JSONIFIER_INLINE_VARIABLE uint16_t charTable02[]{ 0x3030u, 0x3130u, 0x3230u, 0x3330u, 0x3430u, 0x3530u, 0x3630u, 0x3730u, 0x3830u, 0x3930u, 0x3031u, 0x3131u, 0x3231u, 0x3331u, - 0x3431u, 0x3531u, 0x3631u, 0x3731u, 0x3831u, 0x3931u, 0x3032u, 0x3132u, 0x3232u, 0x3332u, 0x3432u, 0x3532u, 0x3632u, 0x3732u, 0x3832u, 0x3932u, 0x3033u, 0x3133u, 0x3233u, - 0x3333u, 0x3433u, 0x3533u, 0x3633u, 0x3733u, 0x3833u, 0x3933u, 0x3034u, 0x3134u, 0x3234u, 0x3334u, 0x3434u, 0x3534u, 0x3634u, 0x3734u, 0x3834u, 0x3934u, 0x3035u, 0x3135u, - 0x3235u, 0x3335u, 0x3435u, 0x3535u, 0x3635u, 0x3735u, 0x3835u, 0x3935u, 0x3036u, 0x3136u, 0x3236u, 0x3336u, 0x3436u, 0x3536u, 0x3636u, 0x3736u, 0x3836u, 0x3936u, 0x3037u, - 0x3137u, 0x3237u, 0x3337u, 0x3437u, 0x3537u, 0x3637u, 0x3737u, 0x3837u, 0x3937u, 0x3038u, 0x3138u, 0x3238u, 0x3338u, 0x3438u, 0x3538u, 0x3638u, 0x3738u, 0x3838u, 0x3938u, - 0x3039u, 0x3139u, 0x3239u, 0x3339u, 0x3439u, 0x3539u, 0x3639u, 0x3739u, 0x3839u, 0x3939u }; - -constexpr const uint64_t mask24 = (1ull << 24) - 1ull; -constexpr const uint64_t mask32 = (1ull << 32) - 1ull; -constexpr const uint64_t mask57 = (1ull << 57) - 1ull; -constexpr const uint64_t mult1_3 = 10ull * (1 << 24) / 1000 + 1; -constexpr const uint64_t mult5_6 = 10ull * (1ull << 32ull) / 100000 + 1; -constexpr const uint64_t mult7_8 = 10ull * (1ull << 48ull) / 10000000 + 1; -constexpr const uint64_t mult9_10 = (1ull << 48ull) / 1000000 + 1; - -struct int_serializing_package_2 { - mutable uint64_t value01; - mutable uint64_t value02; -}; - -template JSONIFIER_INLINE char* lengthNew1(char* buf, value_type value) { - buf[0] = charTable1[value]; - return buf + 1ull; -} - -template JSONIFIER_INLINE char* lengthNew2(char* buf, value_type value) { - std::memcpy(buf, charTable02 + value, 2); - return buf + 2; -} - -template JSONIFIER_INLINE char* lengthNew3(char* buf, value_type value) { - constexpr int_serializing_package_2 intPackage{}; - intPackage.value01 = mult1_3 * value; - buf[0] = charTable1[intPackage.value01 >> 24]; - intPackage.value02 = (intPackage.value01 & mask24) * 100ull; - std::memcpy(buf + 1, charTable02 + (intPackage.value02 >> 24), 2); - return buf + 3; -} - -template JSONIFIER_INLINE char* lengthNew4(char* buf, value_type value) { - constexpr int_serializing_package_2 intPackage{}; - intPackage.value01 = mult1_3 * value; - std::memcpy(buf, charTable02 + (intPackage.value01 >> 24), 2); - intPackage.value02 = (intPackage.value01 & mask24) * 100ull; - std::memcpy(buf + 2, charTable02 + (intPackage.value02 >> 24), 2); - return buf + 4; -} - -template JSONIFIER_INLINE char* lengthNew5(char* buf, value_type value) { - constexpr int_serializing_package_2 intPackage{}; - intPackage.value01 = mult5_6 * value; - buf[0] = charTable1[intPackage.value01 >> 32]; - intPackage.value02 = (intPackage.value01 & mask32) * 100ull; - std::memcpy(buf + 1, charTable02 + (intPackage.value02 >> 32), 2); - intPackage.value01 = (intPackage.value02 & mask32) * 100ull; - std::memcpy(buf + 3, charTable02 + (intPackage.value01 >> 32), 2); - return buf + 5; -} - -template JSONIFIER_INLINE char* lengthNew6(char* buf, value_type value) { - constexpr int_serializing_package_2 intPackage{}; - intPackage.value01 = mult5_6 * value; - std::memcpy(buf, charTable02 + (intPackage.value01 >> 32), 2); - intPackage.value02 = (intPackage.value01 & mask32) * 100ull; - std::memcpy(buf + 2, charTable02 + (intPackage.value02 >> 32), 2); - intPackage.value01 = (intPackage.value02 & mask32) * 100ull; - std::memcpy(buf + 4, charTable02 + (intPackage.value01 >> 32), 2); - return buf + 6; -} - -template JSONIFIER_INLINE char* lengthNew7(char* buf, value_type value) { - constexpr int_serializing_package_2 intPackage{}; - intPackage.value01 = mult7_8 * value >> 16; - buf[0] = charTable1[intPackage.value01 >> 32]; - intPackage.value02 = (intPackage.value01 & mask32) * 100ull; - std::memcpy(buf + 1, charTable02 + (intPackage.value02 >> 32), 2); - intPackage.value01 = (intPackage.value02 & mask32) * 100ull; - std::memcpy(buf + 3, charTable02 + (intPackage.value01 >> 32), 2); - intPackage.value02 = (intPackage.value01 & mask32) * 100ull; - std::memcpy(buf + 5, charTable02 + (intPackage.value02 >> 32), 2); - return buf + 7; -} - -template JSONIFIER_INLINE char* lengthNew8(char* buf, value_type value) { - constexpr int_serializing_package_2 intPackage{}; - intPackage.value01 = mult7_8 * value >> 16; - std::memcpy(buf, charTable02 + (intPackage.value01 >> 32), 2); - intPackage.value02 = (intPackage.value01 & mask32) * 100ull; - std::memcpy(buf + 2, charTable02 + (intPackage.value02 >> 32), 2); - intPackage.value01 = (intPackage.value02 & mask32) * 100ull; - std::memcpy(buf + 4, charTable02 + (intPackage.value01 >> 32), 2); - intPackage.value02 = (intPackage.value01 & mask32) * 100ull; - std::memcpy(buf + 6, charTable02 + (intPackage.value02 >> 32), 2); - return buf + 8; -} - -template JSONIFIER_INLINE char* lengthNew9(char* buf, value_type value) { - constexpr int_serializing_package_2 intPackage{}; - intPackage.value01 = value * 1441151880ull >> 57; - intPackage.value02 = value - intPackage.value01 * 100000000ull; - buf[0] = charTable1[intPackage.value01]; - intPackage.value01 = (mult9_10 * intPackage.value02 >> 16) + 1ull; - std::memcpy(buf + 1, charTable02 + (intPackage.value01 >> 32), 2); - intPackage.value02 = (intPackage.value01 & mask32) * 100ull; - std::memcpy(buf + 3, charTable02 + (intPackage.value02 >> 32), 2); - intPackage.value01 = (intPackage.value02 & mask32) * 100ull; - std::memcpy(buf + 5, charTable02 + (intPackage.value01 >> 32), 2); - intPackage.value02 = (intPackage.value01 & mask32) * 100ull; - std::memcpy(buf + 7, charTable02 + (intPackage.value02 >> 32), 2); - return buf + 9; -} - -template JSONIFIER_INLINE char* lengthNew10(char* buf, value_type value) { - constexpr int_serializing_package_2 intPackage{}; - intPackage.value01 = value * 1441151880ull >> 57; - intPackage.value02 = value - intPackage.value01 * 100000000ull; - std::memcpy(buf, charTable02 + intPackage.value01, 2); - intPackage.value01 = (mult9_10 * intPackage.value02 >> 16) + 1; - std::memcpy(buf + 2, charTable02 + (intPackage.value01 >> 32), 2); - intPackage.value02 = (intPackage.value01 & mask32) * 100ull; - std::memcpy(buf + 4, charTable02 + (intPackage.value02 >> 32), 2); - intPackage.value01 = (intPackage.value02 & mask32) * 100ull; - std::memcpy(buf + 6, charTable02 + (intPackage.value01 >> 32), 2); - intPackage.value02 = (intPackage.value01 & mask32) * 100ull; - std::memcpy(buf + 8, charTable02 + (intPackage.value02 >> 32), 2); - return buf + 10ull; -} - -template JSONIFIER_INLINE char* lengthNew11(char* buf, value_type value) { - const uint64_t z = value / 10ull; - const uint64_t u = value - z * 10ull; - buf = lengthNew10(buf, z); - return lengthNew1(buf, u); -} - -template JSONIFIER_INLINE char* lengthNew12(char* buf, value_type value) { - const uint64_t z = value / 100ull; - const uint64_t u = value - z * 100ull; - buf = lengthNew10(buf, z); - return lengthNew2(buf, u); -} - -template JSONIFIER_INLINE char* lengthNew13(char* buf, value_type value) { - const uint64_t z = value / 1000ull; - const uint64_t u = value - z * 1000ull; - buf = lengthNew10(buf, z); - return lengthNew3(buf, u); -} - -template JSONIFIER_INLINE char* lengthNew14(char* buf, value_type value) { - const uint64_t z = value / 10000ull; - const uint64_t u = value - z * 10000ull; - buf = lengthNew10(buf, z); - return lengthNew4(buf, u); -} +#include +#include -template JSONIFIER_INLINE char* lengthNew15(char* buf, value_type value) { - const uint64_t z = value / 100000ull; - const uint64_t u = value - z * 100000ull; - buf = lengthNew10(buf, z); - return lengthNew5(buf, u); -} +uint64_t generateRandomIntegerByLength(uint32_t digitLength) { + if (digitLength == 0) { + throw std::invalid_argument("Digit length must be greater than 0."); + } -template JSONIFIER_INLINE char* lengthNew16(char* buf, value_type value) { - const uint64_t z = value / 1000000ull; - const uint64_t u = value - z * 1000000ull; - buf = lengthNew10(buf, z); - return lengthNew6(buf, u); -} + if (digitLength > 20) { + throw std::invalid_argument("Digit length exceeds the limit for uint64_t (maximum 20 digits)."); + } -template JSONIFIER_INLINE char* lengthNew17(char* buf, value_type value) { - uint64_t tmp = value / 10000000000ull; - uint64_t f0 = mult7_8 * tmp >> 16; - buf[0] = charTable1[f0 >> 32]; - uint64_t f2 = (f0 & mask32) * 100ull; - std::memcpy(buf + 1, charTable02 + (f2 >> 32), 2); - uint64_t f4 = (f2 & mask32) * 100ull; - std::memcpy(buf + 3, charTable02 + (f4 >> 32), 2); - uint64_t f6 = (f4 & mask32) * 100ull; - std::memcpy(buf + 5, charTable02 + (f6 >> 32), 2); - tmp = value - tmp * 10000000000ull; - const uint64_t u = (tmp) * 1441151880ull >> 57; - const uint64_t z = ( tmp )-u * 100000000ull; - std::memcpy(buf + 7, charTable02 + u, 2); - f0 = (mult9_10 * z >> 16) + 1; - std::memcpy(buf + 9, charTable02 + (f0 >> 32), 2); - f2 = (f0 & mask32) * 100ull; - std::memcpy(buf + 11, charTable02 + (f2 >> 32), 2); - f4 = (f2 & mask32) * 100ull; - std::memcpy(buf + 13, charTable02 + (f4 >> 32), 2); - f6 = (f4 & mask32) * 100ull; - std::memcpy(buf + 15, charTable02 + (f6 >> 32), 2); - return buf + 17; -} + // Directly compute the min and max values using powers of 10 + uint64_t minValue = static_cast(std::pow(10, digitLength - 1)); + uint64_t maxValue = static_cast(std::pow(10, digitLength) - 1); -template JSONIFIER_INLINE char* lengthNew18(char* buf, value_type value) { - uint64_t tmp = value / 10000000000ull; - uint64_t f0 = mult7_8 * tmp >> 16; - std::memcpy(buf, charTable02 + (f0 >> 32), 2); - uint64_t f2 = (f0 & mask32) * 100ull; - std::memcpy(buf + 2, charTable02 + (f2 >> 32), 2); - uint64_t f4 = (f2 & mask32) * 100ull; - std::memcpy(buf + 4, charTable02 + (f4 >> 32), 2); - uint64_t f6 = (f4 & mask32) * 100ull; - std::memcpy(buf + 6, charTable02 + (f6 >> 32), 2); - tmp = value - tmp * 10000000000ull; - const uint64_t u = (tmp) * 1441151880ull >> 57; - const uint64_t z = ( tmp )-u * 100000000ull; - std::memcpy(buf + 8, charTable02 + u, 2); - f0 = (mult9_10 * z >> 16) + 1; - std::memcpy(buf + 10, charTable02 + (f0 >> 32), 2); - f2 = (f0 & mask32) * 100ull; - std::memcpy(buf + 12, charTable02 + (f2 >> 32), 2); - f4 = (f2 & mask32) * 100ull; - std::memcpy(buf + 14, charTable02 + (f4 >> 32), 2); - f6 = (f4 & mask32) * 100ull; - std::memcpy(buf + 16, charTable02 + (f6 >> 32), 2); - return buf + 18; -} + std::random_device rd; + std::mt19937_64 gen(rd()); + std::uniform_int_distribution dist(minValue, maxValue); -template JSONIFIER_INLINE char* lengthNew19(char* buf, value_type value) { - uint64_t u = value / 100000000ull; - const uint64_t z = value - u * 100000000ull; - - const uint64_t uOld{ u }; - u /= 100000000ull; - const uint64_t y = uOld - u * 100000000ull; - - uint64_t f0 = mult1_3 * u; - buf[0] = charTable1[f0 >> 24]; - uint64_t f2 = (f0 & mask24) * 100ull; - std::memcpy(buf + 1, charTable02 + (f2 >> 24), 2); - f0 = (mult9_10 * y >> 16) + 1ull; - std::memcpy(buf + 3, charTable02 + (f0 >> 32), 2); - f2 = (f0 & mask32) * 100ull; - std::memcpy(buf + 5, charTable02 + (f2 >> 32), 2); - uint64_t f4 = (f2 & mask32) * 100ull; - std::memcpy(buf + 7, charTable02 + (f4 >> 32), 2); - uint64_t f6 = (f4 & mask32) * 100ull; - std::memcpy(buf + 9, charTable02 + (f6 >> 32), 2); - f0 = (mult9_10 * z >> 16) + 1ull; - std::memcpy(buf + 11, charTable02 + (f0 >> 32), 2); - f2 = (f0 & mask32) * 100ull; - std::memcpy(buf + 13, charTable02 + (f2 >> 32), 2); - f4 = (f2 & mask32) * 100ull; - std::memcpy(buf + 15, charTable02 + (f4 >> 32), 2); - f6 = (f4 & mask32) * 100ull; - std::memcpy(buf + 17, charTable02 + (f6 >> 32), 2); - return buf + 19; + return dist(gen); } -template JSONIFIER_INLINE char* lengthNew20(char* buf, value_type value) { - uint64_t u = value / 100000000ull; - const uint64_t z = value - u * 100000000ull; - - const uint64_t uOld{ u }; - u /= 100000000ull; - const uint64_t y = uOld - u * 100000000ull; - - uint64_t f0 = mult1_3 * u; - std::memcpy(buf, charTable02 + (f0 >> 24), 2); - uint64_t f2 = (f0 & mask24) * 100ull; - std::memcpy(buf + 2, charTable02 + (f2 >> 24), 2); - f0 = (mult9_10 * y >> 16) + 1ull; - std::memcpy(buf + 4, charTable02 + (f0 >> 32), 2); - f2 = (f0 & mask32) * 100ull; - std::memcpy(buf + 6, charTable02 + (f2 >> 32), 2); - uint64_t f4 = (f2 & mask32) * 100ull; - std::memcpy(buf + 8, charTable02 + (f4 >> 32), 2); - uint64_t f6 = (f4 & mask32) * 100ull; - std::memcpy(buf + 10, charTable02 + (f6 >> 32), 2); - f0 = (mult9_10 * z >> 16) + 1ull; - std::memcpy(buf + 12, charTable02 + (f0 >> 32), 2); - f2 = (f0 & mask32) * 100ull; - std::memcpy(buf + 14, charTable02 + (f2 >> 32), 2); - f4 = (f2 & mask32) * 100ull; - std::memcpy(buf + 16, charTable02 + (f4 >> 32), 2); - f6 = (f4 & mask32) * 100ull; - std::memcpy(buf + 18, charTable02 + (f6 >> 32), 2); - return buf + 20ull; -} +template std::vector generateRandomIntegers(size_t count, size_t maxLength) { + std::random_device rd; + std::mt19937_64 gen(rd()); + std::uniform_int_distribution lengthGen(1, maxLength); + std::vector randomNumbers; -template - requires std::same_as, uint64_t> -JSONIFIER_INLINE char* to_text_from_integer(char* buf, value_type value) { - const uint64_t index{ jsonifier::internal::fastDigitCount(value) }; - switch (index) { - case 1: { - return lengthNew1(buf, value); - } - case 2: { - return lengthNew2(buf, value); - } - case 3: { - return lengthNew3(buf, value); - } - case 4: { - return lengthNew4(buf, value); - } - case 5: { - return lengthNew5(buf, value); - } - case 6: { - return lengthNew6(buf, value); - } - case 7: { - return lengthNew7(buf, value); - } - case 8: { - return lengthNew8(buf, value); - } - case 9: { - return lengthNew9(buf, value); - } - case 10: { - return lengthNew10(buf, value); - } - case 11: { - return lengthNew11(buf, value); - } - case 12: { - return lengthNew12(buf, value); - } - case 13: { - return lengthNew13(buf, value); - } - case 14: { - return lengthNew14(buf, value); - } - case 15: { - return lengthNew15(buf, value); - } - case 16: { - return lengthNew16(buf, value); - } - case 17: { - return lengthNew17(buf, value); - } - case 18: { - return lengthNew18(buf, value); - } - case 19: { - return lengthNew19(buf, value); - } - default: { - return lengthNew20(buf, value); + for (size_t i = 0; i < count; ++i) { + uint64_t newValue{ generateRandomIntegerByLength(lengthGen(gen)) }; + if (newValue >= std::numeric_limits::max()) { + newValue /= 10; } + randomNumbers.push_back(newValue); } -} -template - requires std::same_as, int64_t> -auto* to_text_from_integer(auto* buf, value_type x) noexcept { - *buf = '-'; - return to_text_from_integer(buf + (x < 0), uint64_t(x ^ (x >> 63)) - (x >> 63)); -} - -constexpr char char_table[200] = { '0', '0', '0', '1', '0', '2', '0', '3', '0', '4', '0', '5', '0', '6', '0', '7', '0', '8', '0', '9', '1', '0', '1', '1', '1', '2', '1', '3', '1', - '4', '1', '5', '1', '6', '1', '7', '1', '8', '1', '9', '2', '0', '2', '1', '2', '2', '2', '3', '2', '4', '2', '5', '2', '6', '2', '7', '2', '8', '2', '9', '3', '0', '3', '1', - '3', '2', '3', '3', '3', '4', '3', '5', '3', '6', '3', '7', '3', '8', '3', '9', '4', '0', '4', '1', '4', '2', '4', '3', '4', '4', '4', '5', '4', '6', '4', '7', '4', '8', '4', - '9', '5', '0', '5', '1', '5', '2', '5', '3', '5', '4', '5', '5', '5', '6', '5', '7', '5', '8', '5', '9', '6', '0', '6', '1', '6', '2', '6', '3', '6', '4', '6', '5', '6', '6', - '6', '7', '6', '8', '6', '9', '7', '0', '7', '1', '7', '2', '7', '3', '7', '4', '7', '5', '7', '6', '7', '7', '7', '8', '7', '9', '8', '0', '8', '1', '8', '2', '8', '3', '8', - '4', '8', '5', '8', '6', '8', '7', '8', '8', '8', '9', '9', '0', '9', '1', '9', '2', '9', '3', '9', '4', '9', '5', '9', '6', '9', '7', '9', '8', '9', '9' }; - -template - requires(std::same_as, uint32_t>) -JSONIFIER_INLINE auto* to_chars_u64_len_8(auto* buf, value_type value) noexcept { - /* 8 digits: aabbccdd */ - const uint32_t aabb = uint32_t((uint64_t(value) * 109951163) >> 40); /* (value / 10000) */ - const uint32_t ccdd = value - aabb * 10000; /* (value % 10000) */ - const uint32_t aa = (aabb * 5243) >> 19; /* (aabb / 100) */ - const uint32_t cc = (ccdd * 5243) >> 19; /* (ccdd / 100) */ - const uint32_t bb = aabb - aa * 100; /* (aabb % 100) */ - const uint32_t dd = ccdd - cc * 100; /* (ccdd % 100) */ - std::memcpy(buf, char_table + aa * 2, 2); - std::memcpy(buf + 2, char_table + bb * 2, 2); - std::memcpy(buf + 4, char_table + cc * 2, 2); - std::memcpy(buf + 6, char_table + dd * 2, 2); - return buf + 8; + return randomNumbers; } -template - requires(std::same_as, uint32_t>) -JSONIFIER_INLINE auto* to_chars_u64_len_4(auto* buf, value_type value) noexcept { - /* 4 digits: aabb */ - const uint32_t aa = (value * 5243) >> 19; /* (value / 100) */ - const uint32_t bb = value - aa * 100; /* (value % 100) */ - std::memcpy(buf, char_table + aa * 2, 2); - std::memcpy(buf + 2, char_table + bb * 2, 2); - return buf + 4; -} +template struct int_tables { -template - requires(std::same_as, uint32_t>) -inline auto* to_chars_u64_len_1_8(auto* buf, value_type value) noexcept { - uint32_t aa, bb, cc, dd, aabb, bbcc, ccdd, lz; - - if (value < 100) { /* 1-2 digits: aa */ - lz = value < 10; - std::memcpy(buf, char_table + value * 2 + lz, 2); - buf -= lz; - return buf + 2; - } else if (value < 10000) { /* 3-4 digits: aabb */ - aa = (value * 5243) >> 19; /* (value / 100) */ - bb = value - aa * 100; /* (value % 100) */ - lz = aa < 10; - std::memcpy(buf, char_table + aa * 2 + lz, 2); - buf -= lz; - std::memcpy(buf + 2, char_table + bb * 2, 2); - return buf + 4; - } else if (value < 1000000) { /* 5-6 digits: aabbcc */ - aa = uint32_t((uint64_t(value) * 429497) >> 32); /* (value / 10000) */ - bbcc = value - aa * 10000; /* (value % 10000) */ - bb = (bbcc * 5243) >> 19; /* (bbcc / 100) */ - cc = bbcc - bb * 100; /* (bbcc % 100) */ - lz = aa < 10; - std::memcpy(buf, char_table + aa * 2 + lz, 2); - buf -= lz; - std::memcpy(buf + 2, char_table + bb * 2, 2); - std::memcpy(buf + 4, char_table + cc * 2, 2); - return buf + 6; - } else { /* 7-8 digits: aabbccdd */ - /* (value / 10000) */ - aabb = uint32_t((uint64_t(value) * 109951163) >> 40); - ccdd = value - aabb * 10000; /* (value % 10000) */ - aa = (aabb * 5243) >> 19; /* (aabb / 100) */ - cc = (ccdd * 5243) >> 19; /* (ccdd / 100) */ - bb = aabb - aa * 100; /* (aabb % 100) */ - dd = ccdd - cc * 100; /* (ccdd % 100) */ - lz = aa < 10; - std::memcpy(buf, char_table + aa * 2 + lz, 2); - buf -= lz; - std::memcpy(buf + 2, char_table + bb * 2, 2); - std::memcpy(buf + 4, char_table + cc * 2, 2); - std::memcpy(buf + 6, char_table + dd * 2, 2); - return buf + 8; - } -} + static constexpr uint8_t digitCounts[]{ 19, 19, 19, 19, 18, 18, 18, 17, 17, 17, 16, 16, 16, 16, 15, 15, 15, 14, 14, 14, 13, 13, 13, 13, 12, 12, 12, 11, 11, 11, 10, 10, 10, 10, + 9, 9, 9, 8, 8, 8, 7, 7, 7, 7, 6, 6, 6, 5, 5, 5, 4, 4, 4, 4, 3, 3, 3, 2, 2, 2, 1, 1, 1, 1, 1 }; -template - requires(std::same_as, uint32_t>) -auto* to_chars_u64_len_5_8(auto* buf, value_type value) noexcept { - if (value < 1000000) { /* 5-6 digits: aabbcc */ - const uint32_t aa = uint32_t((uint64_t(value) * 429497) >> 32); /* (value / 10000) */ - const uint32_t bbcc = value - aa * 10000; /* (value % 10000) */ - const uint32_t bb = (bbcc * 5243) >> 19; /* (bbcc / 100) */ - const uint32_t cc = bbcc - bb * 100; /* (bbcc % 100) */ - const uint32_t lz = aa < 10; - std::memcpy(buf, char_table + aa * 2 + lz, 2); - buf -= lz; - std::memcpy(buf + 2, char_table + bb * 2, 2); - std::memcpy(buf + 4, char_table + cc * 2, 2); - return buf + 6; - } else { /* 7-8 digits: aabbccdd */ - /* (value / 10000) */ - const uint32_t aabb = uint32_t((uint64_t(value) * 109951163) >> 40); - const uint32_t ccdd = value - aabb * 10000; /* (value % 10000) */ - const uint32_t aa = (aabb * 5243) >> 19; /* (aabb / 100) */ - const uint32_t cc = (ccdd * 5243) >> 19; /* (ccdd / 100) */ - const uint32_t bb = aabb - aa * 100; /* (aabb % 100) */ - const uint32_t dd = ccdd - cc * 100; /* (ccdd % 100) */ - const uint32_t lz = aa < 10; - std::memcpy(buf, char_table + aa * 2 + lz, 2); - buf -= lz; - std::memcpy(buf + 2, char_table + bb * 2, 2); - std::memcpy(buf + 4, char_table + cc * 2, 2); - std::memcpy(buf + 6, char_table + dd * 2, 2); - return buf + 8; - } -} + static constexpr uint64_t digitCountThresholds[]{ 0ull, 9ull, 99ull, 999ull, 9999ull, 99999ull, 999999ull, 9999999ull, 99999999ull, 999999999ull, 9999999999ull, 99999999999ull, + 999999999999ull, 9999999999999ull, 99999999999999ull, 999999999999999ull, 9999999999999999ull, 99999999999999999ull, 999999999999999999ull, 9999999999999999999ull }; +}; -template - requires(std::same_as, uint64_t>) -auto* to_chars(auto* buf, value_type value) noexcept { - if (value < 100000000) { /* 1-8 digits */ - buf = to_chars_u64_len_1_8(buf, uint32_t(value)); - return buf; - } else if (value < 100000000ull * 100000000ull) { /* 9-16 digits */ - const uint64_t hgh = value / 100000000; - const auto low = uint32_t(value - hgh * 100000000); /* (value % 100000000) */ - buf = to_chars_u64_len_1_8(buf, uint32_t(hgh)); - buf = to_chars_u64_len_8(buf, low); - return buf; - } else { /* 17-20 digits */ - const uint64_t tmp = value / 100000000; - const auto low = uint32_t(value - tmp * 100000000); /* (value % 100000000) */ - const auto hgh = uint32_t(tmp / 10000); - const auto mid = uint32_t(tmp - hgh * 10000); /* (tmp % 10000) */ - buf = to_chars_u64_len_5_8(buf, hgh); - buf = to_chars_u64_len_4(buf, mid); - buf = to_chars_u64_len_8(buf, low); - return buf; +JSONIFIER_INLINE uint64_t fastDigitCount(const uint64_t inputValue) { + const uint64_t originalDigitCount{ int_tables::digitCounts[jsonifier::simd::lzcnt(inputValue)] }; + return originalDigitCount + static_cast(inputValue > int_tables::digitCountThresholds[originalDigitCount]); +} + +JSONIFIER_INLINE int int_log2(uint64_t x) { + return 63 - jsonifier::simd::lzcnt(x | 1); +} + +JSONIFIER_INLINE int digit_count(uint32_t x) { + static constexpr uint32_t table[] = { 9, 99, 999, 9999, 99999, 999999, 9999999, 99999999, 999999999 }; + int y = (9 * int_log2(x)) >> 5; + y += x > table[y]; + return y + 1; +} + +JSONIFIER_INLINE int digit_count(uint64_t x) { + static constexpr uint64_t table[] = { 9, 99, 999, 9999, 99999, 999999, 9999999, 99999999, 999999999, 9999999999, 99999999999, 999999999999, 9999999999999, 99999999999999, + 999999999999999ULL, 9999999999999999ULL, 99999999999999999ULL, 999999999999999999ULL, 9999999999999999999ULL }; + int y = (19 * int_log2(x) >> 6); + y += x > table[y]; + return y + 1; +} + +JSONIFIER_INLINE int alternative_digit_count(uint32_t x) { + static constexpr uint64_t table[] = { 4294967296, 8589934582, 8589934582, 8589934582, 12884901788, 12884901788, 12884901788, 17179868184, 17179868184, 17179868184, 21474826480, + 21474826480, 21474826480, 21474826480, 25769703776, 25769703776, 25769703776, 30063771072, 30063771072, 30063771072, 34349738368, 34349738368, 34349738368, 34349738368, + 38554705664, 38554705664, 38554705664, 41949672960, 41949672960, 41949672960, 42949672960, 42949672960 }; + return (x + table[int_log2(x)]) >> 32; +} + +JSONIFIER_INLINE int alternative_digit_count(uint64_t x) { + static constexpr uint64_t table[64][2] = { + { 0x01, 0xfffffffffffffff6ULL }, + { 0x01, 0xfffffffffffffff6ULL }, + { 0x01, 0xfffffffffffffff6ULL }, + { 0x01, 0xfffffffffffffff6ULL }, + { 0x02, 0xffffffffffffff9cULL }, + { 0x02, 0xffffffffffffff9cULL }, + { 0x02, 0xffffffffffffff9cULL }, + { 0x03, 0xfffffffffffffc18ULL }, + { 0x03, 0xfffffffffffffc18ULL }, + { 0x03, 0xfffffffffffffc18ULL }, + { 0x04, 0xffffffffffffd8f0ULL }, + { 0x04, 0xffffffffffffd8f0ULL }, + { 0x04, 0xffffffffffffd8f0ULL }, + { 0x04, 0xffffffffffffd8f0ULL }, + { 0x05, 0xfffffffffffe7960ULL }, + { 0x05, 0xfffffffffffe7960ULL }, + { 0x05, 0xfffffffffffe7960ULL }, + { 0x06, 0xfffffffffff0bdc0ULL }, + { 0x06, 0xfffffffffff0bdc0ULL }, + { 0x06, 0xfffffffffff0bdc0ULL }, + { 0x07, 0xffffffffff676980ULL }, + { 0x07, 0xffffffffff676980ULL }, + { 0x07, 0xffffffffff676980ULL }, + { 0x07, 0xffffffffff676980ULL }, + { 0x08, 0xfffffffffa0a1f00ULL }, + { 0x08, 0xfffffffffa0a1f00ULL }, + { 0x08, 0xfffffffffa0a1f00ULL }, + { 0x09, 0xffffffffc4653600ULL }, + { 0x09, 0xffffffffc4653600ULL }, + { 0x09, 0xffffffffc4653600ULL }, + { 0x0a, 0xfffffffdabf41c00ULL }, + { 0x0a, 0xfffffffdabf41c00ULL }, + { 0x0a, 0xfffffffdabf41c00ULL }, + { 0x0a, 0xfffffffdabf41c00ULL }, + { 0x0b, 0xffffffe8b7891800ULL }, + { 0x0b, 0xffffffe8b7891800ULL }, + { 0x0b, 0xffffffe8b7891800ULL }, + { 0x0c, 0xffffff172b5af000ULL }, + { 0x0c, 0xffffff172b5af000ULL }, + { 0x0c, 0xffffff172b5af000ULL }, + { 0x0d, 0xfffff6e7b18d6000ULL }, + { 0x0d, 0xfffff6e7b18d6000ULL }, + { 0x0d, 0xfffff6e7b18d6000ULL }, + { 0x0d, 0xfffff6e7b18d6000ULL }, + { 0x0e, 0xffffa50cef85c000ULL }, + { 0x0e, 0xffffa50cef85c000ULL }, + { 0x0e, 0xffffa50cef85c000ULL }, + { 0x0f, 0xfffc72815b398000ULL }, + { 0x0f, 0xfffc72815b398000ULL }, + { 0x0f, 0xfffc72815b398000ULL }, + { 0x10, 0xffdc790d903f0000ULL }, + { 0x10, 0xffdc790d903f0000ULL }, + { 0x10, 0xffdc790d903f0000ULL }, + { 0x10, 0xffdc790d903f0000ULL }, + { 0x11, 0xfe9cba87a2760000ULL }, + { 0x11, 0xfe9cba87a2760000ULL }, + { 0x11, 0xfe9cba87a2760000ULL }, + { 0x12, 0xf21f494c589c0000ULL }, + { 0x12, 0xf21f494c589c0000ULL }, + { 0x12, 0xf21f494c589c0000ULL }, + { 0x13, 0x7538dcfb76180000ULL }, + { 0x13, 0x7538dcfb76180000ULL }, + { 0x13, 0x7538dcfb76180000ULL }, + { 0x13, 0x7538dcfb76180000ULL }, + }; + int log = int_log2(x); + uint64_t low = table[log][1]; + uint64_t high = table[log][0]; + return (x + low < x) + high; +} + +JSONIFIER_INLINE int fast_digit_count(uint32_t x) { + // It's also possible to reuse the table from fast_digit_count_64, since the + // first 32 entries match, and the fact that elements are 64 instead of 32 bit + // wide doesn't seem to affect performance. + static constexpr uint32_t table[32] = { + 9ul,// 0 + 9ul,// 1 + 9ul,// 2 + 9ul,// 3 + 99ul,// 4 + 99ul,// 5 + 99ul,// 6 + 999ul,// 7 + 999ul,// 8 + 999ul,// 9 + 9999ul,// 10 + 9999ul,// 11 + 9999ul,// 12 + 9999ul,// 13 + 99999ul,// 14 + 99999ul,// 15 + 99999ul,// 16 + 999999ul,// 17 + 999999ul,// 18 + 999999ul,// 19 + 9999999ul,// 20 + 9999999ul,// 21 + 9999999ul,// 22 + 9999999ul,// 23 + 99999999ul,// 24 + 99999999ul,// 25 + 99999999ul,// 26 + 999999999ul,// 27 + 999999999ul,// 28 + 999999999ul,// 29 + 4294967295ul,// 30 + 4294967295ul,// 31 + }; + unsigned log = int_log2(x); + return ((77 * log) >> 8) + 1 + (x > table[log]); +} + +JSONIFIER_INLINE int fast_digit_count(uint64_t x) { + // table[i] is 1 less than the smallest power of 10 greater than 2 to the power of i. + // + // For example: + // + // 2^3 = 8 -> table[3] = 10 - 1 = 9 + // 2^4 = 16 -> table[4] = 100 - 1 = 99 + // + static constexpr uint64_t table[64] = { + 9ull,// 0 + 9ull,// 1 + 9ull,// 2 + 9ull,// 3 + 99ull,// 4 + 99ull,// 5 + 99ull,// 6 + 999ull,// 7 + 999ull,// 8 + 999ull,// 9 + 9999ull,// 10 + 9999ull,// 11 + 9999ull,// 12 + 9999ull,// 13 + 99999ull,// 14 + 99999ull,// 15 + 99999ull,// 16 + 999999ull,// 17 + 999999ull,// 18 + 999999ull,// 19 + 9999999ull,// 20 + 9999999ull,// 21 + 9999999ull,// 22 + 9999999ull,// 23 + 99999999ull,// 24 + 99999999ull,// 25 + 99999999ull,// 26 + 999999999ull,// 27 + 999999999ull,// 28 + 999999999ull,// 29 + 9999999999ull,// 30 + 9999999999ull,// 31 + 9999999999ull,// 32 + 9999999999ull,// 33 + 99999999999ull,// 34 + 99999999999ull,// 35 + 99999999999ull,// 36 + 999999999999ull,// 37 + 999999999999ull,// 38 + 999999999999ull,// 39 + 9999999999999ull,// 40 + 9999999999999ull,// 41 + 9999999999999ull,// 42 + 9999999999999ull,// 43 + 99999999999999ull,// 44 + 99999999999999ull,// 45 + 99999999999999ull,// 46 + 999999999999999ull,// 47 + 999999999999999ull,// 48 + 999999999999999ull,// 49 + 9999999999999999ull,// 50 + 9999999999999999ull,// 51 + 9999999999999999ull,// 52 + 9999999999999999ull,// 53 + 99999999999999999ull,// 54 + 99999999999999999ull,// 55 + 99999999999999999ull,// 56 + 999999999999999999ull,// 57 + 999999999999999999ull,// 58 + 999999999999999999ull,// 59 + 9999999999999999999ull,// 60 + 9999999999999999999ull,// 61 + 9999999999999999999ull,// 62 + 9999999999999999999ull,// 63 + }; + // 77/256 = 0.30078125 is an approximation of log(2)/log(10) = 0.30102999566398114 + unsigned log = int_log2(x); + return ((77 * log) >> 8) + 1 + (x > table[log]); +} + +template JSONIFIER_INLINE void testFunction32() { + auto randomIntegers = generateRandomIntegers(count, sizeof(uint32_t) == 4 ? 10 : 20); + std::vector counts{}; + std::vector results{}; + counts.resize(count); + results.resize(count); + for (size_t x = 0; x < count; ++x) { + counts[x] = digit_count(randomIntegers[x]); } -} - -template - requires std::same_as, int64_t> -auto* to_chars(auto* buf, value_type x) noexcept { - *buf = '-'; - // shifts are necessary to have the numeric_limits::min case - return to_chars(buf + (x < 0), uint64_t(x ^ (x >> 63)) - (x >> 63)); -} - -template - requires(std::same_as, uint32_t>) -JSONIFIER_INLINE string_buffer_ptr to_chars_u64_len_8_new(string_buffer_ptr buf, value_type value) noexcept { - /* 8 digits: aabbccdd */ - const uint32_t aabb = uint32_t((uint64_t(value) * 109951163) >> 40); /* (value / 10000) */ - const uint32_t ccdd = value - aabb * 10000; /* (value % 10000) */ - const uint32_t aa = (aabb * 5243) >> 19; /* (aabb / 100) */ - const uint32_t cc = (ccdd * 5243) >> 19; /* (ccdd / 100) */ - const uint32_t bb = aabb - aa * 100; /* (aabb % 100) */ - const uint32_t dd = ccdd - cc * 100; /* (ccdd % 100) */ - std::memcpy(buf, char_table + aa * 2, 2); - std::memcpy(buf + 2, jsonifier::internal::int_tables<>::charTable02 + bb, 2); - std::memcpy(buf + 4, jsonifier::internal::int_tables<>::charTable02 + cc, 2); - std::memcpy(buf + 6, jsonifier::internal::int_tables<>::charTable02 + dd, 2); - return buf + 8; -} - -template - requires(std::same_as, uint32_t>) -JSONIFIER_INLINE string_buffer_ptr to_chars_u64_len_4_new(string_buffer_ptr buf, value_type value) noexcept { - /* 4 digits: aabb */ - const uint32_t aa = (value * 5243) >> 19; /* (value / 100) */ - const uint32_t bb = value - aa * 100; /* (value % 100) */ - std::memcpy(buf, char_table + aa * 2, 2); - std::memcpy(buf + 2, jsonifier::internal::int_tables<>::charTable02 + bb, 2); - return buf + 4; -} -template - requires(std::same_as, uint32_t>) -JSONIFIER_INLINE string_buffer_ptr to_chars_u64_len_1_8_new(string_buffer_ptr buf, value_type value) noexcept { - uint32_t aa, bb, cc, dd, aabb, bbcc, ccdd, lz; - - if (value < 100) { /* 1-2 digits: aa */ - lz = value < 10; - std::memcpy(buf, char_table + value * 2 + lz, 2); - buf -= lz; - return buf + 2; - } else if (value < 10000) { /* 3-4 digits: aabb */ - aa = (value * 5243) >> 19; /* (value / 100) */ - bb = value - aa * 100; /* (value % 100) */ - lz = aa < 10; - std::memcpy(buf, char_table + aa * 2 + lz, 2); - buf -= lz; - std::memcpy(buf + 2, jsonifier::internal::int_tables<>::charTable02 + bb, 2); - return buf + 4; - } else if (value < 1000000) { /* 5-6 digits: aabbcc */ - aa = uint32_t((uint64_t(value) * 429497) >> 32); /* (value / 10000) */ - bbcc = value - aa * 10000; /* (value % 10000) */ - bb = (bbcc * 5243) >> 19; /* (bbcc / 100) */ - cc = bbcc - bb * 100; /* (bbcc % 100) */ - lz = aa < 10; - std::memcpy(buf, char_table + aa * 2 + lz, 2); - buf -= lz; - std::memcpy(buf + 2, jsonifier::internal::int_tables<>::charTable02 + bb, 2); - std::memcpy(buf + 4, jsonifier::internal::int_tables<>::charTable02 + cc, 2); - return buf + 6; - } else { /* 7-8 digits: aabbccdd */ - /* (value / 10000) */ - aabb = uint32_t((uint64_t(value) * 109951163) >> 40); - ccdd = value - aabb * 10000; /* (value % 10000) */ - aa = (aabb * 5243) >> 19; /* (aabb / 100) */ - cc = (ccdd * 5243) >> 19; /* (ccdd / 100) */ - bb = aabb - aa * 100; /* (aabb % 100) */ - dd = ccdd - cc * 100; /* (ccdd % 100) */ - lz = aa < 10; - std::memcpy(buf, char_table + aa * 2 + lz, 2); - buf -= lz; - std::memcpy(buf + 2, jsonifier::internal::int_tables<>::charTable02 + bb, 2); - std::memcpy(buf + 4, jsonifier::internal::int_tables<>::charTable02 + cc, 2); - std::memcpy(buf + 6, jsonifier::internal::int_tables<>::charTable02 + dd, 2); - return buf + 8; + bnch_swt::benchmark_stage<"compare-decimal-counting-functions-" + name, 20, 4>::template runBenchmark<"alternative-digit-count-32", "cyan">([&]() { + uint64_t currentCount{}; + for (size_t x = 0; x < count; ++x) { + auto newCount = alternative_digit_count(randomIntegers[x]); + results[x] = newCount; + currentCount += static_cast(newCount); + } + bnch_swt::doNotOptimizeAway(currentCount); + return currentCount; + }); + for (size_t x = 0; x < count; ++x) { + if (results[x] != counts[x]) { + std::cout << "alternative-digit-count-32 failed to count the integers of value: " << randomIntegers[x] << ", instead it counted: " << results[x] + << ", when it should be: " << counts[x] << std::endl; + } } -} -template - requires(std::same_as, uint32_t>) -JSONIFIER_INLINE string_buffer_ptr to_chars_u64_len_5_8_new(string_buffer_ptr buf, value_type value) noexcept { - if (value < 1000000) { /* 5-6 digits: aabbcc */ - const uint32_t aa = uint32_t((uint64_t(value) * 429497) >> 32); /* (value / 10000) */ - const uint32_t bbcc = value - aa * 10000; /* (value % 10000) */ - const uint32_t bb = (bbcc * 5243) >> 19; /* (bbcc / 100) */ - const uint32_t cc = bbcc - bb * 100; /* (bbcc % 100) */ - const uint32_t lz = aa < 10; - std::memcpy(buf, char_table + aa * 2 + lz, 2); - buf -= lz; - std::memcpy(buf + 2, jsonifier::internal::int_tables<>::charTable02 + bb, 2); - std::memcpy(buf + 4, jsonifier::internal::int_tables<>::charTable02 + cc, 2); - return buf + 6; - } else { /* 7-8 digits: aabbccdd */ - /* (value / 10000) */ - const uint32_t aabb = uint32_t((uint64_t(value) * 109951163) >> 40); - const uint32_t ccdd = value - aabb * 10000; /* (value % 10000) */ - const uint32_t aa = (aabb * 5243) >> 19; /* (aabb / 100) */ - const uint32_t cc = (ccdd * 5243) >> 19; /* (ccdd / 100) */ - const uint32_t bb = aabb - aa * 100; /* (aabb % 100) */ - const uint32_t dd = ccdd - cc * 100; /* (ccdd % 100) */ - const uint32_t lz = aa < 10; - std::memcpy(buf, char_table + aa * 2 + lz, 2); - buf -= lz; - std::memcpy(buf + 2, jsonifier::internal::int_tables<>::charTable02 + bb, 2); - std::memcpy(buf + 4, jsonifier::internal::int_tables<>::charTable02 + cc, 2); - std::memcpy(buf + 6, jsonifier::internal::int_tables<>::charTable02 + dd, 2); - return buf + 8; + bnch_swt::benchmark_stage<"compare-decimal-counting-functions-" + name, 20, 4>::template runBenchmark<"fast-digit-count-32", "cyan">([&]() { + uint64_t currentCount{}; + for (size_t x = 0; x < count; ++x) { + auto newCount = fast_digit_count(randomIntegers[x]); + results[x] = newCount; + currentCount += static_cast(newCount); + } + bnch_swt::doNotOptimizeAway(currentCount); + return currentCount; + }); + for (size_t x = 0; x < count; ++x) { + if (results[x] != counts[x]) { + std::cout << "fast-digit-count-32 failed to count the integers of value: " << randomIntegers[x] << ", instead it counted: " << results[x] + << ", when it should be: " << counts[x] << std::endl; + } } -} -template - requires(std::same_as, uint64_t>) -JSONIFIER_INLINE string_buffer_ptr to_chars_new(string_buffer_ptr buf, value_type value) noexcept { - if (value < 100000000) { /* 1-8 digits */ - buf = to_chars_u64_len_1_8(buf, uint32_t(value)); - return buf; - } else if (value < 100000000ull * 100000000ull) { /* 9-16 digits */ - const uint64_t hgh = value / 100000000; - const auto low = uint32_t(value - hgh * 100000000); /* (value % 100000000) */ - buf = to_chars_u64_len_1_8_new(buf, uint32_t(hgh)); - buf = to_chars_u64_len_8_new(buf, low); - return buf; - } else { /* 17-20 digits */ - const uint64_t tmp = value / 100000000; - const auto low = uint32_t(value - tmp * 100000000); /* (value % 100000000) */ - const auto hgh = uint32_t(tmp / 10000); - const auto mid = uint32_t(tmp - hgh * 10000); /* (tmp % 10000) */ - buf = to_chars_u64_len_5_8_new(buf, hgh); - buf = to_chars_u64_len_4_new(buf, mid); - buf = to_chars_u64_len_8_new(buf, low); - return buf; + bnch_swt::benchmark_stage<"compare-decimal-counting-functions-" + name, 20, 4>::template runBenchmark<"digit-count-32", "cyan">([&]() { + uint64_t currentCount{}; + for (size_t x = 0; x < count; ++x) { + auto newCount = alternative_digit_count(randomIntegers[x]); + results[x] = newCount; + currentCount += static_cast(newCount); + } + bnch_swt::doNotOptimizeAway(currentCount); + return currentCount; + }); + for (size_t x = 0; x < count; ++x) { + if (results[x] != counts[x]) { + std::cout << "digit-count-32 failed to count the integers of value: " << randomIntegers[x] << ", instead it counted: " << results[x] + << ", when it should be: " << counts[x] << std::endl; + } } -} - -static constexpr char radix_100_table[] = { '0', '0', '0', '1', '0', '2', '0', '3', '0', '4', '0', '5', '0', '6', '0', '7', '0', '8', '0', '9', '1', '0', '1', '1', '1', '2', '1', - '3', '1', '4', '1', '5', '1', '6', '1', '7', '1', '8', '1', '9', '2', '0', '2', '1', '2', '2', '2', '3', '2', '4', '2', '5', '2', '6', '2', '7', '2', '8', '2', '9', '3', '0', - '3', '1', '3', '2', '3', '3', '3', '4', '3', '5', '3', '6', '3', '7', '3', '8', '3', '9', '4', '0', '4', '1', '4', '2', '4', '3', '4', '4', '4', '5', '4', '6', '4', '7', '4', - '8', '4', '9', '5', '0', '5', '1', '5', '2', '5', '3', '5', '4', '5', '5', '5', '6', '5', '7', '5', '8', '5', '9', '6', '0', '6', '1', '6', '2', '6', '3', '6', '4', '6', '5', - '6', '6', '6', '7', '6', '8', '6', '9', '7', '0', '7', '1', '7', '2', '7', '3', '7', '4', '7', '5', '7', '6', '7', '7', '7', '8', '7', '9', '8', '0', '8', '1', '8', '2', '8', - '3', '8', '4', '8', '5', '8', '6', '8', '7', '8', '8', '8', '9', '9', '0', '9', '1', '9', '2', '9', '3', '9', '4', '9', '5', '9', '6', '9', '7', '9', '8', '9', '9' }; - -char* itoa_always_10_digits(std::uint64_t n, char* buffer) { - constexpr auto mask = (std::uint64_t(1) << 57) - 1; - auto y = n * std::uint64_t(1441151881); - std::memcpy(buffer + 0, radix_100_table + int(y >> 57) * 2, 2); - y &= mask; - y *= 100; - std::memcpy(buffer + 2, radix_100_table + int(y >> 57) * 2, 2); - y &= mask; - y *= 100; - std::memcpy(buffer + 4, radix_100_table + int(y >> 57) * 2, 2); - y &= mask; - y *= 100; - std::memcpy(buffer + 6, radix_100_table + int(y >> 57) * 2, 2); - y &= mask; - y *= 100; - std::memcpy(buffer + 8, radix_100_table + int(y >> 57) * 2, 2); - - return buffer + 10; -} - -template - requires std::same_as, int64_t> -JSONIFIER_INLINE string_buffer_ptr to_chars_new(string_buffer_ptr buf, value_type x) noexcept { - *buf = '-'; - // shifts are necessary to have the numeric_limits::min case - return to_chars(buf + (x < 0), uint64_t(x ^ (x >> 63)) - (x >> 63)); -} -uint64_t generateRandomIntegerByLength(uint32_t digitLength) { - std::uniform_int_distribution distLength(1, digitLength); - std::uniform_int_distribution dist01(1, 9); - std::uniform_int_distribution dist02(0, 9); - std::random_device rd; - std::mt19937_64 gen(rd()); - //digitLength = distLength(gen); - - char buffer[22]{}; - buffer[0] = static_cast(dist01(gen) + '0'); - - for (uint64_t x = 1ull; x < digitLength; ++x) { - buffer[x] = static_cast(dist02(gen) + '0'); + bnch_swt::benchmark_stage<"compare-decimal-counting-functions-" + name, 20, 4>::template runBenchmark<"rtc-32-bit", "cyan">([&]() { + uint64_t currentCount{}; + for (size_t x = 0; x < count; ++x) { + auto newCount = fastDigitCount(randomIntegers[x]); + results[x] = newCount; + currentCount += static_cast(newCount); + } + bnch_swt::doNotOptimizeAway(currentCount); + return currentCount; + }); + for (size_t x = 0; x < count; ++x) { + if (results[x] != counts[x]) { + std::cout << "rtc-32-bit failed to count the integers of value: " << randomIntegers[x] << ", instead it counted: " << results[x] << ", when it should be: " << counts[x] + << std::endl; + } } - buffer[digitLength] = '\0'; - return std::strtoull(buffer, nullptr, 10); + bnch_swt::benchmark_stage<"compare-decimal-counting-functions-" + name, 20, 4>::printResults(true, true); } -template std::vector generateRandomIntegers(uint64_t count, uint64_t maxLength = 0) { - std::random_device rd; - std::mt19937_64 gen(rd()); - std::uniform_int_distribution lengthNewGen(1, 20); - std::vector randomNumbers; - - for (uint64_t value = 0ull; value < count; ++value) { - uint64_t newValue{ generateRandomIntegerByLength(maxLength == 0 ? lengthNewGen(gen) : maxLength) }; - randomNumbers.push_back(newValue); +template JSONIFIER_INLINE void testFunction64() { + auto randomIntegers = generateRandomIntegers(count, sizeof(uint64_t) == 4 ? 10 : 20); + std::vector counts{}; + std::vector results{}; + counts.resize(count); + results.resize(count); + for (size_t x = 0; x < count; ++x) { + counts[x] = digit_count(randomIntegers[x]); } - return randomNumbers; -} - -template auto generateVectorOfVectors(uint64_t count01, uint64_t count02, uint64_t lengthNew) { - std::vector> returnValues{}; - for (uint64_t x = 0ull; x < count01; ++x) { - returnValues.emplace_back(generateRandomIntegers(count02, lengthNew)); + bnch_swt::benchmark_stage<"compare-decimal-counting-functions-" + name, 20, 4>::template runBenchmark<"fast-digit-count-64", "cyan">([&]() { + uint64_t currentCount{}; + for (size_t x = 0; x < count; ++x) { + auto newCount = fast_digit_count(randomIntegers[x]); + results[x] = newCount; + currentCount += static_cast(newCount); + } + bnch_swt::doNotOptimizeAway(currentCount); + return currentCount; + }); + for (size_t x = 0; x < count; ++x) { + if (results[x] != counts[x]) { + std::cout << "fast-digit-count-64 failed to count the integers of value: " << randomIntegers[x] << ", instead it counted: " << results[x] + << ", when it should be: " << counts[x] << std::endl; + } } - return returnValues; -} -static constexpr auto maxIterations{ 300 }; -static constexpr auto measuredIterations{ 20 }; - -JSONIFIER_INLINE_VARIABLE uint8_t digitCounts[]{ 19, 19, 19, 19, 18, 18, 18, 17, 17, 17, 16, 16, 16, 16, 15, 15, 15, 14, 14, 14, 13, 13, 13, 13, 12, 12, 12, 11, 11, 11, 10, 10, 10, - 10, 9, 9, 9, 8, 8, 8, 7, 7, 7, 7, 6, 6, 6, 5, 5, 5, 4, 4, 4, 4, 3, 3, 3, 2, 2, 2, 1, 1, 1, 1, 1 }; - -JSONIFIER_INLINE_VARIABLE uint64_t digitCountThresholds[]{ 0ull, 9ull, 99ull, 999ull, 9999ull, 99999ull, 999999ull, 9999999ull, 99999999ull, 999999999ull, 9999999999ull, - 99999999999ull, 999999999999ull, 9999999999999ull, 99999999999999ull, 999999999999999ull, 9999999999999999ull, 99999999999999999ull, 999999999999999999ull, - 9999999999999999999ull }; - -JSONIFIER_INLINE uint64_t fastDigitCount(const uint64_t inputValue) { - const uint64_t originalDigitCount{ digitCounts[jsonifier::simd::lzcnt(inputValue)] }; - return originalDigitCount + static_cast(inputValue > digitCountThresholds[originalDigitCount]); -} - -template BNCH_SWT_INLINE void testFunction() { - std::vector> testValues{ generateVectorOfVectors(maxIterations * measuredIterations, count, lengthNew) }; - std::vector> testValues00{}; - std::vector> testValues01{}; - testValues01.resize(maxIterations * measuredIterations); - for (uint64_t x = 0ull; x < maxIterations * measuredIterations; ++x) { - testValues01[x].resize(count); - } - testValues00.resize(maxIterations * measuredIterations); - testValues01.resize(maxIterations * measuredIterations); - for (uint64_t x = 0ull; x < maxIterations * measuredIterations; ++x) { - for (uint64_t y = 0ull; y < count; ++y) { - testValues00[x].emplace_back(std::to_string(testValues[x][y])); + bnch_swt::benchmark_stage<"compare-decimal-counting-functions-" + name, 20, 4>::template runBenchmark<"alternative-digit-count-64", "cyan">([&]() { + uint64_t currentCount{}; + for (size_t x = 0; x < count; ++x) { + auto newCount = alternative_digit_count(randomIntegers[x]); + results[x] = newCount; + currentCount += static_cast(newCount); + } + bnch_swt::doNotOptimizeAway(currentCount); + return currentCount; + }); + for (size_t x = 0; x < count; ++x) { + if (results[x] != counts[x]) { + std::cout << "alternative-digit-count-64 failed to count the integers of value: " << randomIntegers[x] << ", instead it counted: " << results[x] + << ", when it should be: " << counts[x] << std::endl; } } - uint64_t currentIteration{}; - std::vector> newerStrings{}; - newerStrings.resize(maxIterations * measuredIterations); - srand(std::chrono::high_resolution_clock::now().time_since_epoch().count()); - bnch_swt::benchmark_stage::template runBenchmark<"glz::to_chars", "CYAN">([&] { - uint64_t bytesProcessed{}; - for (uint64_t x = 0ull; x < count; ++x) { - auto newPtr = to_chars(newerStrings[currentIteration].data(), testValues[currentIteration][x]); - bytesProcessed += testValues00[currentIteration][x].size(); - testValues01[currentIteration][x] = std::string{ newerStrings[currentIteration].data(), static_cast(newPtr - newerStrings[currentIteration].data()) }; + + bnch_swt::benchmark_stage<"compare-decimal-counting-functions-" + name, 20, 4>::template runBenchmark<"digit-count-64", "cyan">([&]() { + uint64_t currentCount{}; + for (size_t x = 0; x < count; ++x) { + auto newCount = alternative_digit_count(randomIntegers[x]); + results[x] = newCount; + currentCount += static_cast(newCount); } - bnch_swt::doNotOptimizeAway(bytesProcessed); - ++currentIteration; - return bytesProcessed; + bnch_swt::doNotOptimizeAway(currentCount); + return currentCount; }); - std::cout << "TOTAL ITERATIONS: " << currentIteration << std::endl; - for (uint64_t x = 0ull; x < currentIteration; ++x) { - for (uint64_t y = 0ull; y < count; ++y) { - if (testValues00[x][y] != testValues01[x][y]) { - std::cout << "GLZ FAILED TO SERIALIZE THIS VALUE: " << testValues00[x][y] << std::endl; - std::cout << "GLZ FAILED TO SERIALIZE THIS VALUE (RAW): " << testValues[x][y] << std::endl; - std::cout << "GLZ FAILED TO SERIALIZE THIS VALUE-SIZE (RAW): " << testValues00[x][y].size() << std::endl; - std::cout << "INSTEAD IT PRODUCED THIS VALUE-SIZE: " << testValues01[x][y].size() << std::endl; - std::cout << "INSTEAD IT PRODUCED THIS VALUE: " << testValues01[x][y] << std::endl; - } + for (size_t x = 0; x < count; ++x) { + if (results[x] != counts[x]) { + std::cout << "digit-count-64 failed to count the integers of value: " << randomIntegers[x] << ", instead it counted: " << results[x] + << ", when it should be: " << counts[x] << std::endl; } } - currentIteration = 0ull; - bnch_swt::benchmark_stage::template runBenchmark<"jsonifier::internal::toChars", "CYAN">([&] { - uint64_t bytesProcessed{}; - for (uint64_t x = 0ull; x < count; ++x) { - auto newPtr = jsonifier::internal::toChars(newerStrings[currentIteration].data(), testValues[currentIteration][x]); - bytesProcessed += testValues00[currentIteration][x].size(); - testValues01[currentIteration][x] = std::string{ newerStrings[currentIteration].data(), static_cast(newPtr - newerStrings[currentIteration].data()) }; + bnch_swt::benchmark_stage<"compare-decimal-counting-functions-" + name, 20, 4>::template runBenchmark<"rtc-64-bit", "cyan">([&]() { + uint64_t currentCount{}; + for (size_t x = 0; x < count; ++x) { + auto newCount = fastDigitCount(randomIntegers[x]); + results[x] = newCount; + currentCount += static_cast(newCount); } - bnch_swt::doNotOptimizeAway(bytesProcessed); - ++currentIteration; - return bytesProcessed; + bnch_swt::doNotOptimizeAway(currentCount); + return currentCount; }); - for (uint64_t x = 0ull; x < currentIteration; ++x) { - for (uint64_t y = 0ull; y < count; ++y) { - if (testValues00[x][y] != testValues01[x][y]) { - std::cout << "jsonifier::internal::toChars FAILED TO SERIALIZE THIS VALUE: " << testValues00[x][y] << std::endl; - std::cout << "jsonifier::internal::toChars FAILED TO SERIALIZE THIS VALUE (RAW): " << testValues[x][y] << std::endl; - std::cout << "jsonifier::internal::toChars FAILED TO SERIALIZE THIS VALUE-SIZE (RAW): " << testValues00[x][y].size() << std::endl; - std::cout << "INSTEAD IT PRODUCED THIS VALUE-SIZE: " << testValues01[x][y].size() << std::endl; - std::cout << "INSTEAD IT PRODUCED THIS VALUE: " << testValues01[x][y] << std::endl; - } + for (size_t x = 0; x < count; ++x) { + if (results[x] != counts[x]) { + std::cout << "rtc-64-bit failed to count the integers of value: " << randomIntegers[x] << ", instead it counted: " << results[x] << ", when it should be: " << counts[x] + << std::endl; } } - currentIteration = 0ull; - - bnch_swt::benchmark_stage::printResults(true, true); + bnch_swt::benchmark_stage<"compare-decimal-counting-functions-" + name, 20, 4>::printResults(true, true); } int main() { - testFunction<512, 1, uint64_t, "int-to-string-comparisons-1">(); - testFunction<512, 1, int64_t, "int-to-string-comparisons-1">(); - testFunction<522, 2, uint64_t, "int-to-string-comparisons-2">(); - testFunction<522, 2, int64_t, "int-to-string-comparisons-2">(); - testFunction<542, 4, uint64_t, "int-to-string-comparisons-4">(); - testFunction<542, 4, int64_t, "int-to-string-comparisons-4">(); - testFunction<582, 8, uint64_t, "int-to-string-comparisons-8">(); - testFunction<582, 8, int64_t, "int-to-string-comparisons-8">(); - testFunction<5162, 16, uint64_t, "int-to-string-comparisons-16">(); - testFunction<5162, 16, int64_t, "int-to-string-comparisons-16">(); - testFunction<512, 0, uint64_t, "int-to-string-comparisons-x">(); - testFunction<512, 0, int64_t, "int-to-string-comparisons-x">(); - return 0ull; + testFunction32<1000000, "uint32-test-1000000">(); + testFunction64<1000000, "uint64-test-1000000">(); + return 0; } \ No newline at end of file diff --git a/Include/BnchSwt/BenchmarkSuite.hpp b/Include/BnchSwt/BenchmarkSuite.hpp index e650e970..38c1f654 100644 --- a/Include/BnchSwt/BenchmarkSuite.hpp +++ b/Include/BnchSwt/BenchmarkSuite.hpp @@ -28,34 +28,29 @@ #endif #include -#include #include +#include #include #include #include #include #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include namespace bnch_swt { namespace internal { + template struct arg_passer : public arg_types... { + template constexpr arg_passer(arg_types_new&&... argTupleNew) : arg_types{ std::forward(argTupleNew)... }... {} + + template BNCH_SWT_INLINE auto impl(function_type&& function) { + return (function(*static_cast(this)...)); + } + }; + + template arg_passer(arg_types...) -> arg_passer; + template BNCH_SWT_INLINE constexpr value_type01 max(value_type01 val01, value_type02 val02) { return val01 > static_cast(val02) ? val01 : static_cast(val02); } @@ -68,28 +63,28 @@ namespace bnch_swt { template struct benchmark_stage { static_assert(maxExecutionCount % measuredIterationCount == 0, "Sorry, but please enter a maxExecutionCount that is divisible by measuredIterationCount."); - inline static std::unordered_map results{}; + inline static thread_local std::unordered_map results{}; BNCH_SWT_INLINE static void printResults(bool showComparison = true, bool showMetrics = true) { std::vector resultsNew{}; - for (auto& [key, value]: results) { + for (const auto& [key, value]: results) { resultsNew.emplace_back(value); } if (resultsNew.size() > 0) { std::sort(resultsNew.begin(), resultsNew.end(), std::greater{}); std::cout << "Performance Metrics for: " << stageNameNew.operator std::string_view() << std::endl; if (showMetrics) { - for (auto& value: resultsNew) { + for (const auto& value: resultsNew) { std::cout << "Metrics for: " << value.name << std::endl; std::cout << std::fixed << std::setprecision(2); - static constexpr auto printMetric = [](const std::string& label, const value_type& value) { + static constexpr auto printMetric = [](const std::string_view& label, const value_type& valueNew) { if constexpr (internal::optional_t) { - if (value.has_value()) { - std::cout << std::left << std::setw(60ull) << label << ": " << value.value() << std::endl; + if (valueNew.has_value()) { + std::cout << std::left << std::setw(60ull) << label << ": " << valueNew.value() << std::endl; } } else { - std::cout << std::left << std::setw(60ull) << label << ": " << value << std::endl; + std::cout << std::left << std::setw(60ull) << label << ": " << valueNew << std::endl; } }; printMetric("Total Iterations to Stabilize", value.totalIterationCount); @@ -125,63 +120,54 @@ namespace bnch_swt { } } -#if defined(NDEBUG) - static constexpr double threshold{ 5.0f }; -#else - static constexpr double threshold{ 10.0f }; -#endif - template - BNCH_SWT_INLINE static const performance_metrics& runBenchmarkWithPrep(prep_function_type&& prepFunctionNew, function_type&& functionNew, arg_types&&... args) { + template + BNCH_SWT_INLINE static performance_metrics runBenchmark(function_type&& functionNew, arg_types&&... args) { static constexpr string_literal subjectName{ subjectNameNew }; static_assert(std::convertible_to, size_t>, - "Sorry, but the lambda passed to runBenchmarkWithPrep() must return a size_t, reflecting the number of bytes processed!"); - std::remove_cvref_t prepFunctionNewer{ std::forward(prepFunctionNew) }; + "Sorry, but the lambda passed to runBenchmark() must return a size_t, reflecting the number of bytes processed!"); std::remove_cvref_t functionNewer{ std::forward(functionNew) }; internal::event_collector events{}; internal::cache_clearer cacheClearer{}; - performance_metrics lowestResultsTemp{}; + performance_metrics lowestResults{}; performance_metrics resultsTemp{}; - size_t currentGlobalIndex{}; - cacheClearer.evictCaches(); - for (size_t x = 0; x < maxExecutionCount && currentGlobalIndex < maxExecutionCount; ++x, ++currentGlobalIndex) { - prepFunctionNewer(); - events.start(functionNewer, std::forward(args)...); + size_t currentGlobalIndex{ measuredIterationCount }; + for (size_t x = 0; x < maxExecutionCount; ++x) { + cacheClearer.evictCaches(); + events.run(functionNewer, std::forward(args)...); } - currentGlobalIndex = measuredIterationCount; for (size_t x = 0; x < maxExecutionCount - measuredIterationCount; ++x, ++currentGlobalIndex) { - auto newPtr = events.data() + x; - resultsTemp = collectMetrics(std::span{ newPtr, measuredIterationCount }, currentGlobalIndex); - lowestResultsTemp = resultsTemp.throughputPercentageDeviation < lowestResultsTemp.throughputPercentageDeviation ? (resultsTemp) : lowestResultsTemp; + auto newPtr = events.data() + x; + resultsTemp = collectMetrics(std::span{ newPtr, measuredIterationCount }, currentGlobalIndex); + lowestResults = resultsTemp.throughputPercentageDeviation < lowestResults.throughputPercentageDeviation ? resultsTemp : lowestResults; } - auto& resultsTempNew = results[subjectName.operator std::string()]; - resultsTempNew = lowestResultsTemp; - return resultsTempNew; + results[subjectName.operator std::string_view()] = lowestResults; + return results[subjectName.operator std::string_view()]; } - template - BNCH_SWT_INLINE static const performance_metrics& runBenchmark(function_type&& functionNew, arg_types&&... args) { + template + BNCH_SWT_INLINE static performance_metrics runBenchmarkWithPrep(prep_function_type&& prepFunctionNew, function_type&& functionNew, arg_types&&... args) { static constexpr string_literal subjectName{ subjectNameNew }; static_assert(std::convertible_to, size_t>, - "Sorry, but the lambda passed to runBenchmark() must return a size_t, reflecting the number of bytes processed!"); + "Sorry, but the lambda passed to runBenchmarkWithPrep() must return a size_t, reflecting the number of bytes processed!"); + std::remove_cvref_t prepFunctionNewer{ std::forward(prepFunctionNew) }; std::remove_cvref_t functionNewer{ std::forward(functionNew) }; internal::event_collector events{}; internal::cache_clearer cacheClearer{}; - performance_metrics lowestResultsTemp{}; + performance_metrics lowestResults{}; performance_metrics resultsTemp{}; - size_t currentGlobalIndex{}; - cacheClearer.evictCaches(); - for (size_t x = 0; x < maxExecutionCount && currentGlobalIndex < maxExecutionCount; ++x, ++currentGlobalIndex) { - events.start(functionNewer, std::forward(args)...); + size_t currentGlobalIndex{ measuredIterationCount }; + for (size_t x = 0; x < maxExecutionCount; ++x) { + prepFunctionNewer(); + cacheClearer.evictCaches(); + events.run(functionNewer, std::forward(args)...); } - currentGlobalIndex = measuredIterationCount; for (size_t x = 0; x < maxExecutionCount - measuredIterationCount; ++x, ++currentGlobalIndex) { - auto newPtr = events.data() + x; - resultsTemp = collectMetrics(std::span{ newPtr, measuredIterationCount }, currentGlobalIndex); - lowestResultsTemp = resultsTemp.throughputPercentageDeviation < lowestResultsTemp.throughputPercentageDeviation ? (resultsTemp) : lowestResultsTemp; + auto newPtr = events.data() + x; + resultsTemp = collectMetrics(std::span{ newPtr, measuredIterationCount }, currentGlobalIndex); + lowestResults = resultsTemp.throughputPercentageDeviation < lowestResults.throughputPercentageDeviation ? resultsTemp : lowestResults; } - auto& resultsTempNew = results[subjectName.operator std::string()]; - resultsTempNew = lowestResultsTemp; - return resultsTempNew; + results[subjectName.operator std::string_view()] = lowestResults; + return results[subjectName.operator std::string_view()]; } }; diff --git a/Include/BnchSwt/CacheClearer.hpp b/Include/BnchSwt/CacheClearer.hpp index 345f94be..035d77ba 100644 --- a/Include/BnchSwt/CacheClearer.hpp +++ b/Include/BnchSwt/CacheClearer.hpp @@ -24,6 +24,7 @@ #pragma once #include +#include #if defined(BNCH_SWT_WIN) #include @@ -34,8 +35,6 @@ #elif defined(BNCH_SWT_LINUX) #include - #include - #include #include #include #include @@ -44,7 +43,6 @@ #include #include #include - #include #include #endif diff --git a/Include/BnchSwt/Counters/AppleArmPerfEvents.hpp b/Include/BnchSwt/Counters/AppleArmPerfEvents.hpp index e4951407..69c289d5 100644 --- a/Include/BnchSwt/Counters/AppleArmPerfEvents.hpp +++ b/Include/BnchSwt/Counters/AppleArmPerfEvents.hpp @@ -38,16 +38,10 @@ #if defined(BNCH_SWT_MAC) - #include - #include - #include - #include - #include - #include - #include - #include #include + #include + #include #include #include @@ -1039,7 +1033,7 @@ namespace bnch_swt::internal { } } - // start counting + // run counting if ((ret = kpc_set_counting(classes))) { std::cout << "Failed to set counting: " << ret << "." << std::endl; return (worked = false); @@ -1077,7 +1071,7 @@ namespace bnch_swt::internal { return hasEventsVal; } - template BNCH_SWT_INLINE void start(function_type&& function, arg_types&&... args) { + template BNCH_SWT_INLINE void run(function_type&& function, arg_types&&... args) { if (hasEvents()) { diff = get_counters(); diff --git a/Include/BnchSwt/Counters/LinuxPerfEvents.hpp b/Include/BnchSwt/Counters/LinuxPerfEvents.hpp index 6922d8c8..6800e752 100644 --- a/Include/BnchSwt/Counters/LinuxPerfEvents.hpp +++ b/Include/BnchSwt/Counters/LinuxPerfEvents.hpp @@ -28,16 +28,11 @@ #if defined(BNCH_SWT_LINUX) - #include #include #include #include - #include #include - #include - #include #include - #include #include namespace bnch_swt::internal { @@ -106,7 +101,7 @@ namespace bnch_swt::internal { } } - BNCH_SWT_INLINE void start() { + BNCH_SWT_INLINE void run() { if (fd != -1) { if (ioctl(fd, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP) == -1) { reportError("ioctl(PERF_EVENT_IOC_RESET)"); @@ -153,16 +148,17 @@ namespace bnch_swt::internal { std::vector results{}; size_t currentIndex{}; BNCH_SWT_INLINE event_collector_type() - : std::vector{ count }, linux_events{ std::vector{ PERF_COUNT_HW_CPU_CYCLES, PERF_COUNT_HW_INSTRUCTIONS, PERF_COUNT_HW_BRANCH_INSTRUCTIONS, - PERF_COUNT_HW_BRANCH_MISSES, PERF_COUNT_HW_CACHE_REFERENCES, PERF_COUNT_HW_CACHE_MISSES } } {}; + : linux_events{ std::vector{ PERF_COUNT_HW_CPU_CYCLES, PERF_COUNT_HW_INSTRUCTIONS, PERF_COUNT_HW_BRANCH_INSTRUCTIONS, PERF_COUNT_HW_BRANCH_MISSES, + PERF_COUNT_HW_CACHE_REFERENCES, PERF_COUNT_HW_CACHE_MISSES } }, + std::vector{ count } {}; BNCH_SWT_INLINE bool hasEvents() { return linux_events::isWorking(); } - template BNCH_SWT_INLINE void start(function_type&& function, arg_types&&... args) { + template BNCH_SWT_INLINE void run(function_type&& function, arg_types&&... args) { if (hasEvents()) { - linux_events::start(); + linux_events::run(); } volatile uint64_t cycleStart = rdtsc(); const auto startClock = clock_type::now(); diff --git a/Include/BnchSwt/Counters/WindowsPerfEvents.hpp b/Include/BnchSwt/Counters/WindowsPerfEvents.hpp index 2c9e8583..d5a71c65 100644 --- a/Include/BnchSwt/Counters/WindowsPerfEvents.hpp +++ b/Include/BnchSwt/Counters/WindowsPerfEvents.hpp @@ -28,11 +28,6 @@ #if defined(BNCH_SWT_WIN) #include - - #include - #include - #include - #include #include namespace bnch_swt::internal { @@ -42,7 +37,7 @@ namespace bnch_swt::internal { BNCH_SWT_INLINE event_collector_type() : std::vector{ count } {}; - template BNCH_SWT_INLINE void start(function_type&& function, arg_types&&... args) { + template BNCH_SWT_INLINE void run(function_type&& function, arg_types&&... args) { volatile uint64_t cycleStart = __rdtsc(); const auto startClock = clock_type::now(); std::vector::operator[](currentIndex) diff --git a/Include/BnchSwt/EventCounter.hpp b/Include/BnchSwt/EventCounter.hpp index a3ddf850..e6e587d6 100644 --- a/Include/BnchSwt/EventCounter.hpp +++ b/Include/BnchSwt/EventCounter.hpp @@ -24,22 +24,10 @@ /// Dec 6, 2024 #pragma once -#if !defined(_MSC_VER) - #include -#endif - -#include - -#include -#include -#include -#include -#include -#include - +#include #include #include -#include +#include namespace bnch_swt::internal { @@ -52,9 +40,9 @@ namespace bnch_swt::internal { return std::chrono::duration(elapsed).count(); } - BNCH_SWT_INLINE bool bytesProcessed(double& bytesProcessedNew) const noexcept { + BNCH_SWT_INLINE bool bytesProcessed(uint64_t& bytesProcessedNew) const noexcept { if (bytesProcessedVal.has_value()) { - bytesProcessedNew = static_cast(bytesProcessedVal.value()); + bytesProcessedNew = bytesProcessedVal.value(); return true; } else { return false; diff --git a/Include/BnchSwt/FileLoader.hpp b/Include/BnchSwt/FileLoader.hpp index e0f7e9f6..24cd2945 100644 --- a/Include/BnchSwt/FileLoader.hpp +++ b/Include/BnchSwt/FileLoader.hpp @@ -24,8 +24,8 @@ #pragma once #include -#include #include +#include #include namespace bnch_swt { diff --git a/Include/BnchSwt/Metrics.hpp b/Include/BnchSwt/Metrics.hpp index 972714cb..21199c02 100644 --- a/Include/BnchSwt/Metrics.hpp +++ b/Include/BnchSwt/Metrics.hpp @@ -23,9 +23,7 @@ #pragma once #include -#include #include -#include #include namespace bnch_swt { @@ -45,7 +43,7 @@ namespace bnch_swt { std::optional cyclesPerByte{}; std::optional frequencyGHz{}; double throughputMbPerSec{}; - double bytesProcessed{}; + uint64_t bytesProcessed{}; std::string name{}; double timeInNs{}; @@ -67,9 +65,9 @@ namespace bnch_swt::internal { double throughPutTotal{}; double throughPutAvg{}; double throughPutMin{ std::numeric_limits::max() }; - double bytesProcessed{}; - double bytesProcessedTotal{}; - double bytesProcessedAvg{}; + uint64_t bytesProcessed{}; + uint64_t bytesProcessedTotal{}; + uint64_t bytesProcessedAvg{}; double ns{}; double nsTotal{}; double nsAvg{}; @@ -97,7 +95,7 @@ namespace bnch_swt::internal { if (e.bytesProcessed(bytesProcessed)) { bytesProcessedTotal += bytesProcessed; - double volumeMb = bytesProcessed / (1024. * 1024.); + double volumeMb = static_cast(bytesProcessed) / (1024. * 1024.); throughPut = (volumeMb * 1000000000) / ns; throughPutTotal += throughPut; throughPutMin = throughPut < throughPutMin ? throughPut : throughPutMin; @@ -128,7 +126,7 @@ namespace bnch_swt::internal { } } if (eventsNewer.size() > 0) { - bytesProcessedAvg = bytesProcessedTotal / static_cast(eventsNewer.size()); + bytesProcessedAvg = bytesProcessedTotal / eventsNewer.size(); nsAvg = nsTotal / static_cast(eventsNewer.size()); throughPutAvg = throughPutTotal / static_cast(eventsNewer.size()); cyclesAvg = cyclesTotal / static_cast(eventsNewer.size()); @@ -150,14 +148,14 @@ namespace bnch_swt::internal { } if (std::abs(cyclesAvg) > epsilon) { if (metrics.bytesProcessed > 0) { - metrics.cyclesPerByte.emplace(cyclesAvg / (metrics.bytesProcessed)); + metrics.cyclesPerByte.emplace(cyclesAvg / static_cast(metrics.bytesProcessed)); } metrics.cyclesPerExecution.emplace(cyclesTotal / static_cast(eventsNewer.size())); metrics.frequencyGHz.emplace(cyclesAvg / nsAvg); } if (std::abs(instructionsAvg) > epsilon) { if (metrics.bytesProcessed > 0) { - metrics.instructionsPerByte.emplace(instructionsAvg / (metrics.bytesProcessed)); + metrics.instructionsPerByte.emplace(instructionsAvg / static_cast(metrics.bytesProcessed)); } if (std::abs(cyclesAvg) > epsilon) { metrics.instructionsPerCycle.emplace(instructionsAvg / cyclesAvg); diff --git a/Include/BnchSwt/Printable.hpp b/Include/BnchSwt/Printable.hpp index 5b8c9192..f66cf909 100644 --- a/Include/BnchSwt/Printable.hpp +++ b/Include/BnchSwt/Printable.hpp @@ -22,9 +22,8 @@ /// https://github.com/RealTimeChris/BenchmarkSuite #pragma once -#include -#include #include +#include #include namespace bnch_swt { diff --git a/Include/BnchSwt/StringLiteral.hpp b/Include/BnchSwt/StringLiteral.hpp index f978af3a..fad9f22f 100644 --- a/Include/BnchSwt/StringLiteral.hpp +++ b/Include/BnchSwt/StringLiteral.hpp @@ -26,7 +26,6 @@ #include #include #include -#include #include namespace bnch_swt {