Skip to content

Commit

Permalink
Testing arg-passer.
Browse files Browse the repository at this point in the history
  • Loading branch information
RealTimeChris committed Feb 13, 2025
1 parent 01f56fa commit 32033cc
Show file tree
Hide file tree
Showing 11 changed files with 467 additions and 901 deletions.
1,177 changes: 395 additions & 782 deletions Benchmark/main.cpp

Large diffs are not rendered by default.

106 changes: 46 additions & 60 deletions Include/BnchSwt/BenchmarkSuite.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,34 +28,29 @@
#endif

#include <BnchSwt/StringLiteral.hpp>
#include <BnchSwt/EventCounter.hpp>
#include <BnchSwt/DoNotOptimize.hpp>
#include <BnchSwt/EventCounter.hpp>
#include <BnchSwt/CacheClearer.hpp>
#include <BnchSwt/FileLoader.hpp>
#include <BnchSwt/Printable.hpp>
#include <BnchSwt/Metrics.hpp>
#include <BnchSwt/Config.hpp>
#include <unordered_map>
#include <unordered_set>
#include <string_view>
#include <filesystem>
#include <algorithm>
#include <iostream>
#include <fstream>
#include <iomanip>
#include <numeric>
#include <string>
#include <chrono>
#include <random>
#include <vector>
#include <span>
#include <map>
#include <set>

namespace bnch_swt {

namespace internal {

template<typename... arg_types> struct arg_passer : public arg_types... {
template<typename... arg_types_new> constexpr arg_passer(arg_types_new&&... argTupleNew) : arg_types{ std::forward<arg_types_new>(argTupleNew)... }... {}

template<typename function_type> BNCH_SWT_INLINE auto impl(function_type&& function) {
return (function(*static_cast<arg_types*>(this)...));
}
};

template<typename... arg_types> arg_passer(arg_types...) -> arg_passer<arg_types...>;

template<typename value_type01, typename value_type02> BNCH_SWT_INLINE constexpr value_type01 max(value_type01 val01, value_type02 val02) {
return val01 > static_cast<value_type01>(val02) ? val01 : static_cast<value_type01>(val02);
}
Expand All @@ -68,28 +63,28 @@ namespace bnch_swt {

template<string_literal stageNameNew, size_t maxExecutionCount = 200, size_t measuredIterationCount = 25> struct benchmark_stage {
static_assert(maxExecutionCount % measuredIterationCount == 0, "Sorry, but please enter a maxExecutionCount that is divisible by measuredIterationCount.");
inline static std::unordered_map<std::string, performance_metrics> results{};
inline static thread_local std::unordered_map<std::string_view, performance_metrics> results{};

BNCH_SWT_INLINE static void printResults(bool showComparison = true, bool showMetrics = true) {
std::vector<performance_metrics> resultsNew{};
for (auto& [key, value]: results) {
for (const auto& [key, value]: results) {
resultsNew.emplace_back(value);
}
if (resultsNew.size() > 0) {
std::sort(resultsNew.begin(), resultsNew.end(), std::greater<performance_metrics>{});
std::cout << "Performance Metrics for: " << stageNameNew.operator std::string_view() << std::endl;
if (showMetrics) {
for (auto& value: resultsNew) {
for (const auto& value: resultsNew) {
std::cout << "Metrics for: " << value.name << std::endl;
std::cout << std::fixed << std::setprecision(2);

static constexpr auto printMetric = []<typename value_type>(const std::string& label, const value_type& value) {
static constexpr auto printMetric = []<typename value_type>(const std::string_view& label, const value_type& valueNew) {
if constexpr (internal::optional_t<value_type>) {
if (value.has_value()) {
std::cout << std::left << std::setw(60ull) << label << ": " << value.value() << std::endl;
if (valueNew.has_value()) {
std::cout << std::left << std::setw(60ull) << label << ": " << valueNew.value() << std::endl;
}
} else {
std::cout << std::left << std::setw(60ull) << label << ": " << value << std::endl;
std::cout << std::left << std::setw(60ull) << label << ": " << valueNew << std::endl;
}
};
printMetric("Total Iterations to Stabilize", value.totalIterationCount);
Expand Down Expand Up @@ -125,63 +120,54 @@ namespace bnch_swt {
}
}

#if defined(NDEBUG)
static constexpr double threshold{ 5.0f };
#else
static constexpr double threshold{ 10.0f };
#endif
template<string_literal subjectNameNew, string_literal colorNew, typename prep_function_type, typename function_type, internal::not_invocable... arg_types>
BNCH_SWT_INLINE static const performance_metrics& runBenchmarkWithPrep(prep_function_type&& prepFunctionNew, function_type&& functionNew, arg_types&&... args) {
template<string_literal subjectNameNew, string_literal colorNew, typename function_type, internal::not_invocable... arg_types>
BNCH_SWT_INLINE static performance_metrics runBenchmark(function_type&& functionNew, arg_types&&... args) {
static constexpr string_literal subjectName{ subjectNameNew };
static_assert(std::convertible_to<std::invoke_result_t<function_type, arg_types...>, size_t>,
"Sorry, but the lambda passed to runBenchmarkWithPrep() must return a size_t, reflecting the number of bytes processed!");
std::remove_cvref_t<prep_function_type> prepFunctionNewer{ std::forward<prep_function_type>(prepFunctionNew) };
"Sorry, but the lambda passed to runBenchmark() must return a size_t, reflecting the number of bytes processed!");
std::remove_cvref_t<function_type> functionNewer{ std::forward<function_type>(functionNew) };
internal::event_collector<maxExecutionCount> events{};
internal::cache_clearer cacheClearer{};
performance_metrics lowestResultsTemp{};
performance_metrics lowestResults{};
performance_metrics resultsTemp{};
size_t currentGlobalIndex{};
cacheClearer.evictCaches();
for (size_t x = 0; x < maxExecutionCount && currentGlobalIndex < maxExecutionCount; ++x, ++currentGlobalIndex) {
prepFunctionNewer();
events.start(functionNewer, std::forward<arg_types>(args)...);
size_t currentGlobalIndex{ measuredIterationCount };
for (size_t x = 0; x < maxExecutionCount; ++x) {
cacheClearer.evictCaches();
events.run(functionNewer, std::forward<arg_types>(args)...);
}
currentGlobalIndex = measuredIterationCount;
for (size_t x = 0; x < maxExecutionCount - measuredIterationCount; ++x, ++currentGlobalIndex) {
auto newPtr = events.data() + x;
resultsTemp = collectMetrics<subjectName>(std::span{ newPtr, measuredIterationCount }, currentGlobalIndex);
lowestResultsTemp = resultsTemp.throughputPercentageDeviation < lowestResultsTemp.throughputPercentageDeviation ? (resultsTemp) : lowestResultsTemp;
auto newPtr = events.data() + x;
resultsTemp = collectMetrics<subjectName>(std::span{ newPtr, measuredIterationCount }, currentGlobalIndex);
lowestResults = resultsTemp.throughputPercentageDeviation < lowestResults.throughputPercentageDeviation ? resultsTemp : lowestResults;
}
auto& resultsTempNew = results[subjectName.operator std::string()];
resultsTempNew = lowestResultsTemp;
return resultsTempNew;
results[subjectName.operator std::string_view()] = lowestResults;
return results[subjectName.operator std::string_view()];
}

template<string_literal subjectNameNew, string_literal colorNew, typename function_type, internal::not_invocable... arg_types>
BNCH_SWT_INLINE static const performance_metrics& runBenchmark(function_type&& functionNew, arg_types&&... args) {
template<string_literal subjectNameNew, string_literal colorNew, typename prep_function_type, typename function_type, internal::not_invocable... arg_types>
BNCH_SWT_INLINE static performance_metrics runBenchmarkWithPrep(prep_function_type&& prepFunctionNew, function_type&& functionNew, arg_types&&... args) {
static constexpr string_literal subjectName{ subjectNameNew };
static_assert(std::convertible_to<std::invoke_result_t<function_type, arg_types...>, size_t>,
"Sorry, but the lambda passed to runBenchmark() must return a size_t, reflecting the number of bytes processed!");
"Sorry, but the lambda passed to runBenchmarkWithPrep() must return a size_t, reflecting the number of bytes processed!");
std::remove_cvref_t<prep_function_type> prepFunctionNewer{ std::forward<prep_function_type>(prepFunctionNew) };
std::remove_cvref_t<function_type> functionNewer{ std::forward<function_type>(functionNew) };
internal::event_collector<maxExecutionCount> events{};
internal::cache_clearer cacheClearer{};
performance_metrics lowestResultsTemp{};
performance_metrics lowestResults{};
performance_metrics resultsTemp{};
size_t currentGlobalIndex{};
cacheClearer.evictCaches();
for (size_t x = 0; x < maxExecutionCount && currentGlobalIndex < maxExecutionCount; ++x, ++currentGlobalIndex) {
events.start(functionNewer, std::forward<arg_types>(args)...);
size_t currentGlobalIndex{ measuredIterationCount };
for (size_t x = 0; x < maxExecutionCount; ++x) {
prepFunctionNewer();
cacheClearer.evictCaches();
events.run(functionNewer, std::forward<arg_types>(args)...);
}
currentGlobalIndex = measuredIterationCount;
for (size_t x = 0; x < maxExecutionCount - measuredIterationCount; ++x, ++currentGlobalIndex) {
auto newPtr = events.data() + x;
resultsTemp = collectMetrics<subjectName>(std::span{ newPtr, measuredIterationCount }, currentGlobalIndex);
lowestResultsTemp = resultsTemp.throughputPercentageDeviation < lowestResultsTemp.throughputPercentageDeviation ? (resultsTemp) : lowestResultsTemp;
auto newPtr = events.data() + x;
resultsTemp = collectMetrics<subjectName>(std::span{ newPtr, measuredIterationCount }, currentGlobalIndex);
lowestResults = resultsTemp.throughputPercentageDeviation < lowestResults.throughputPercentageDeviation ? resultsTemp : lowestResults;
}
auto& resultsTempNew = results[subjectName.operator std::string()];
resultsTempNew = lowestResultsTemp;
return resultsTempNew;
results[subjectName.operator std::string_view()] = lowestResults;
return results[subjectName.operator std::string_view()];
}
};

Expand Down
4 changes: 1 addition & 3 deletions Include/BnchSwt/CacheClearer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#pragma once

#include <BnchSwt/Config.hpp>
#include <iostream>

#if defined(BNCH_SWT_WIN)
#include <Windows.h>
Expand All @@ -34,8 +35,6 @@

#elif defined(BNCH_SWT_LINUX)
#include <unistd.h>
#include <iostream>
#include <sstream>
#include <fstream>
#include <vector>
#include <string>
Expand All @@ -44,7 +43,6 @@
#include <libkern/OSCacheControl.h>
#include <sys/sysctl.h>
#include <unistd.h>
#include <iostream>
#include <vector>
#endif

Expand Down
14 changes: 4 additions & 10 deletions Include/BnchSwt/Counters/AppleArmPerfEvents.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,16 +38,10 @@

#if defined(BNCH_SWT_MAC)

#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <iostream>

#include <mach/mach_time.h>
#include <sys/kdebug.h>
#include <sys/sysctl.h>
#include <sys/kdebug.h>
#include <iostream>
#include <unistd.h>
#include <dlfcn.h>

Expand Down Expand Up @@ -1039,7 +1033,7 @@ namespace bnch_swt::internal {
}
}

// start counting
// run counting
if ((ret = kpc_set_counting(classes))) {
std::cout << "Failed to set counting: " << ret << "." << std::endl;
return (worked = false);
Expand Down Expand Up @@ -1077,7 +1071,7 @@ namespace bnch_swt::internal {
return hasEventsVal;
}

template<typename function_type, typename... arg_types> BNCH_SWT_INLINE void start(function_type&& function, arg_types&&... args) {
template<typename function_type, typename... arg_types> BNCH_SWT_INLINE void run(function_type&& function, arg_types&&... args) {

if (hasEvents()) {
diff = get_counters();
Expand Down
16 changes: 6 additions & 10 deletions Include/BnchSwt/Counters/LinuxPerfEvents.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,16 +28,11 @@

#if defined(BNCH_SWT_LINUX)

#include <linux/perf_event.h>
#include <linux/perf_event.h>
#include <asm/unistd.h>
#include <sys/ioctl.h>
#include <stdexcept>
#include <unistd.h>
#include <iostream>
#include <libgen.h>
#include <cstring>
#include <cerrno>
#include <vector>

namespace bnch_swt::internal {
Expand Down Expand Up @@ -106,7 +101,7 @@ namespace bnch_swt::internal {
}
}

BNCH_SWT_INLINE void start() {
BNCH_SWT_INLINE void run() {
if (fd != -1) {
if (ioctl(fd, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP) == -1) {
reportError("ioctl(PERF_EVENT_IOC_RESET)");
Expand Down Expand Up @@ -153,16 +148,17 @@ namespace bnch_swt::internal {
std::vector<uint64_t> results{};
size_t currentIndex{};
BNCH_SWT_INLINE event_collector_type()
: std::vector<event_count>{ count }, linux_events{ std::vector<int32_t>{ PERF_COUNT_HW_CPU_CYCLES, PERF_COUNT_HW_INSTRUCTIONS, PERF_COUNT_HW_BRANCH_INSTRUCTIONS,
PERF_COUNT_HW_BRANCH_MISSES, PERF_COUNT_HW_CACHE_REFERENCES, PERF_COUNT_HW_CACHE_MISSES } } {};
: linux_events{ std::vector<int32_t>{ PERF_COUNT_HW_CPU_CYCLES, PERF_COUNT_HW_INSTRUCTIONS, PERF_COUNT_HW_BRANCH_INSTRUCTIONS, PERF_COUNT_HW_BRANCH_MISSES,
PERF_COUNT_HW_CACHE_REFERENCES, PERF_COUNT_HW_CACHE_MISSES } },
std::vector<event_count>{ count } {};

BNCH_SWT_INLINE bool hasEvents() {
return linux_events::isWorking();
}

template<typename function_type, typename... arg_types> BNCH_SWT_INLINE void start(function_type&& function, arg_types&&... args) {
template<typename function_type, typename... arg_types> BNCH_SWT_INLINE void run(function_type&& function, arg_types&&... args) {
if (hasEvents()) {
linux_events::start();
linux_events::run();
}
volatile uint64_t cycleStart = rdtsc();
const auto startClock = clock_type::now();
Expand Down
7 changes: 1 addition & 6 deletions Include/BnchSwt/Counters/WindowsPerfEvents.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,6 @@
#if defined(BNCH_SWT_WIN)

#include <intrin.h>

#include <stdexcept>
#include <iostream>
#include <cstring>
#include <cerrno>
#include <vector>

namespace bnch_swt::internal {
Expand All @@ -42,7 +37,7 @@ namespace bnch_swt::internal {

BNCH_SWT_INLINE event_collector_type() : std::vector<event_count>{ count } {};

template<typename function_type, typename... arg_types> BNCH_SWT_INLINE void start(function_type&& function, arg_types&&... args) {
template<typename function_type, typename... arg_types> BNCH_SWT_INLINE void run(function_type&& function, arg_types&&... args) {
volatile uint64_t cycleStart = __rdtsc();
const auto startClock = clock_type::now();
std::vector<event_count>::operator[](currentIndex)
Expand Down
20 changes: 4 additions & 16 deletions Include/BnchSwt/EventCounter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,22 +24,10 @@
/// Dec 6, 2024
#pragma once

#if !defined(_MSC_VER)
#include <dirent.h>
#endif

#include <BnchSwt/Config.hpp>

#include <cinttypes>
#include <optional>
#include <cstring>
#include <chrono>
#include <vector>
#include <cctype>

#include <BnchSwt/Counters/AppleArmPerfEvents.hpp>
#include <BnchSwt/Counters/WindowsPerfEvents.hpp>
#include <BnchSwt/Counters/LinuxPerfEvents.hpp>
#include <BnchSwt/Counters/AppleArmPerfEvents.hpp>
#include <optional>

namespace bnch_swt::internal {

Expand All @@ -52,9 +40,9 @@ namespace bnch_swt::internal {
return std::chrono::duration<double, std::nano>(elapsed).count();
}

BNCH_SWT_INLINE bool bytesProcessed(double& bytesProcessedNew) const noexcept {
BNCH_SWT_INLINE bool bytesProcessed(uint64_t& bytesProcessedNew) const noexcept {
if (bytesProcessedVal.has_value()) {
bytesProcessedNew = static_cast<double>(bytesProcessedVal.value());
bytesProcessedNew = bytesProcessedVal.value();
return true;
} else {
return false;
Expand Down
2 changes: 1 addition & 1 deletion Include/BnchSwt/FileLoader.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@
#pragma once

#include <BnchSwt/Config.hpp>
#include <sstream>
#include <filesystem>
#include <sstream>
#include <fstream>

namespace bnch_swt {
Expand Down
Loading

0 comments on commit 32033cc

Please sign in to comment.