-
Notifications
You must be signed in to change notification settings - Fork 74
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
10 changed files
with
276 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
120 changes: 120 additions & 0 deletions
120
include/alpaka/test/KernelExecutionBenchmarkFixture.hpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,120 @@ | ||
/* Copyright 2022 Benjamin Worpitz, Andrea Bocci, Bernhard Manfred Gruber | ||
* | ||
* This file is part of alpaka. | ||
* | ||
* This Source Code Form is subject to the terms of the Mozilla Public | ||
* License, v. 2.0. If a copy of the MPL was not distributed with this | ||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||
*/ | ||
|
||
#pragma once | ||
|
||
#include <alpaka/alpaka.hpp> | ||
|
||
#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) && !BOOST_LANG_CUDA | ||
# error If ALPAKA_ACC_GPU_CUDA_ENABLED is set, the compiler has to support CUDA! | ||
#endif | ||
|
||
#if defined(ALPAKA_ACC_GPU_HIP_ENABLED) && !BOOST_LANG_HIP | ||
# error If ALPAKA_ACC_GPU_HIP_ENABLED is set, the compiler has to support HIP! | ||
#endif | ||
|
||
#include <alpaka/test/Check.hpp> | ||
#include <alpaka/test/queue/Queue.hpp> | ||
|
||
#include <catch2/catch.hpp> | ||
|
||
#include <string> | ||
#include <utility> | ||
|
||
namespace alpaka::test | ||
{ | ||
//! The fixture for executing a kernel on a given accelerator. | ||
template<typename TAcc> | ||
class KernelExecutionBenchmarkFixture | ||
{ | ||
public: | ||
using Acc = TAcc; | ||
using Dim = alpaka::Dim<Acc>; | ||
using Idx = alpaka::Idx<Acc>; | ||
using DevAcc = Dev<Acc>; | ||
using PltfAcc = Pltf<DevAcc>; | ||
using QueueAcc = test::DefaultQueue<DevAcc>; | ||
using WorkDiv = WorkDivMembers<Dim, Idx>; | ||
|
||
KernelExecutionBenchmarkFixture(WorkDiv workDiv) | ||
: m_devHost(getDevByIdx<PltfCpu>(0u)) | ||
, m_devAcc(getDevByIdx<PltfAcc>(0u)) | ||
, m_queue(m_devAcc) | ||
, m_workDiv(std::move(workDiv)) | ||
{ | ||
} | ||
|
||
template<typename TExtent> | ||
KernelExecutionBenchmarkFixture(TExtent const& extent) | ||
: KernelExecutionBenchmarkFixture(getValidWorkDiv<Acc>( | ||
getDevByIdx<PltfAcc>(0u), | ||
extent, | ||
Vec<Dim, Idx>::ones(), | ||
false, | ||
GridBlockExtentSubDivRestrictions::Unrestricted)) | ||
{ | ||
} | ||
|
||
template<typename TKernelFnObj, typename... TArgs> | ||
auto operator()( | ||
TKernelFnObj const& kernelFnObj, | ||
std::string const& benchmarkName, | ||
float& result, | ||
TArgs&&... args) -> bool | ||
{ | ||
// Allocate result buffers | ||
auto bufAccResult = allocBuf<float, Idx>(m_devAcc, static_cast<Idx>(1u)); | ||
auto bufHostResult = allocBuf<float, Idx>(m_devHost, static_cast<Idx>(1u)); | ||
|
||
int numRuns = 0; | ||
result = 0.0f; | ||
|
||
// The following block is executed unknown times during estimation phase, then once per benchmark sample | ||
BENCHMARK_ADVANCED(std::string(benchmarkName))(Catch::Benchmark::Chronometer meter) | ||
{ | ||
numRuns++; | ||
memset(m_queue, bufAccResult, 0); | ||
wait(m_queue); | ||
|
||
// Only the following part is measured as the benchmark part | ||
meter.measure( | ||
[&] | ||
{ | ||
exec<Acc>( | ||
m_queue, | ||
m_workDiv, | ||
kernelFnObj, | ||
getPtrNative(bufAccResult), | ||
std::forward<TArgs>(args)...); // run the measured kernel | ||
wait(m_queue); // wait for the kernel to actually run | ||
}); | ||
|
||
// Copy the result value to the host | ||
memcpy(m_queue, bufHostResult, bufAccResult); | ||
wait(m_queue); | ||
|
||
auto const resultLocal = *getPtrNative(bufHostResult); | ||
result += resultLocal; | ||
return resultLocal; // make sure the benchmark call is not optimized away | ||
}; | ||
result /= static_cast<float>(numRuns); | ||
|
||
return true; | ||
// TODO: Can we return the result here and read it from Catch2's REQUIRE or something similar? Or are the | ||
// returns limited to bools? | ||
// return result; | ||
} | ||
|
||
protected: | ||
DevCpu m_devHost; | ||
DevAcc m_devAcc; | ||
QueueAcc m_queue; | ||
WorkDiv m_workDiv; | ||
}; | ||
} // namespace alpaka::test |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
# | ||
# Copyright 2022 Jiri Vyskocil | ||
# | ||
# This file is part of alpaka. | ||
# | ||
# This Source Code Form is subject to the terms of the Mozilla Public | ||
# License, v. 2.0. If a copy of the MPL was not distributed with this | ||
# file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||
# | ||
|
||
cmake_minimum_required(VERSION 3.18) | ||
|
||
add_subdirectory("rand/") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
# | ||
# Copyright 2022 Jiri Vyskocil | ||
# | ||
# This file is part of alpaka. | ||
# | ||
# This Source Code Form is subject to the terms of the Mozilla Public | ||
# License, v. 2.0. If a copy of the MPL was not distributed with this | ||
# file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||
# | ||
|
||
set(_TARGET_NAME "randBenchmark") | ||
|
||
append_recursive_files_add_to_src_group("src/" "src/" "cpp" _FILES_SOURCE) | ||
|
||
alpaka_add_executable( | ||
${_TARGET_NAME} | ||
${_FILES_SOURCE}) | ||
target_link_libraries( | ||
${_TARGET_NAME} | ||
PRIVATE common) | ||
|
||
set_target_properties(${_TARGET_NAME} PROPERTIES FOLDER "test/benchmark") | ||
target_compile_definitions(${_TARGET_NAME} PUBLIC CATCH_CONFIG_ENABLE_BENCHMARKING) | ||
|
||
if(alpaka_CI) | ||
# For non-benchmarking CI test runs - It will only run the benchmark once to see if it works at all. | ||
add_test(NAME ${_TARGET_NAME} COMMAND ${_TARGET_NAME} ${_alpaka_TEST_OPTIONS} --benchmark-samples 1) | ||
# Real automated benchmark runs will need to collect more samples (the default 100 is fine). The CI will then | ||
# have to set another variable to indicate if it is only testing, or if it wants to do a full benchmark. | ||
else() | ||
# For full benchmark run - will collect 100 samples wor godd benchmark statistics. | ||
add_test(NAME ${_TARGET_NAME} COMMAND ${_TARGET_NAME} ${_alpaka_TEST_OPTIONS}) | ||
endif() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
/* Copyright 2022 Jiri Vyskocil | ||
* | ||
* This file is part of alpaka. | ||
* | ||
* This Source Code Form is subject to the terms of the Mozilla Public | ||
* License, v. 2.0. If a copy of the MPL was not distributed with this | ||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||
*/ | ||
|
||
#include <alpaka/example/ExampleDefaultAcc.hpp> | ||
#include <alpaka/rand/Traits.hpp> | ||
#include <alpaka/test/KernelExecutionBenchmarkFixture.hpp> | ||
#include <alpaka/test/acc/TestAccs.hpp> | ||
|
||
#include <catch2/catch.hpp> | ||
|
||
class RandBenchmarkKernel | ||
{ | ||
public: | ||
ALPAKA_NO_HOST_ACC_WARNING | ||
template<typename TAcc, typename TIdx> | ||
ALPAKA_FN_ACC void operator()(TAcc const& acc, float* result, TIdx numPoints) const | ||
{ | ||
// Get the global linearized thread idx. | ||
auto const globalThreadIdx = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc); | ||
auto const globalThreadExtent = alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc); | ||
|
||
auto const linearizedGlobalThreadIdx | ||
= static_cast<TIdx>(alpaka::mapIdx<1u>(globalThreadIdx, globalThreadExtent)[0]); | ||
|
||
// Setup generator engine and distribution. | ||
auto engine = alpaka::rand::engine::createDefault(acc, 42, linearizedGlobalThreadIdx); | ||
auto dist(alpaka::rand::distribution::createUniformReal<float>(acc)); | ||
|
||
float number = 0; | ||
for(TIdx i = linearizedGlobalThreadIdx; i < numPoints; i += static_cast<TIdx>(globalThreadExtent.prod())) | ||
{ | ||
number += dist(engine); | ||
} | ||
|
||
alpaka::atomicAdd( | ||
acc, | ||
result, | ||
number); // TODO: we're measuring the atomicAdd time too, this is not what we want | ||
} | ||
}; | ||
|
||
// TODO: This takes an enormous time to finish and is probably useless anyway: | ||
// TEMPLATE_LIST_TEST_CASE("defaultRandomGeneratorBenchmark", "[randBenchmark]", alpaka::test::TestAccs) | ||
// Running the benchmark on a single default accelerator instead | ||
TEST_CASE("defaultRandomGeneratorBenchmark", "[randBenchmark]") | ||
{ | ||
// using Acc = TestType; | ||
using Acc = alpaka::ExampleDefaultAcc<alpaka::DimInt<1>, std::size_t>; | ||
using Dim = alpaka::Dim<Acc>; | ||
using Idx = alpaka::Idx<Acc>; | ||
using Vec = alpaka::Vec<Dim, Idx>; | ||
using WorkDiv = alpaka::WorkDivMembers<Dim, Idx>; | ||
|
||
auto const devAcc = alpaka::getDevByIdx<Acc>(0u); | ||
|
||
const Idx numThreads = std::thread::hardware_concurrency(); // TODO: GPU? | ||
std::cout << "Hardware threads: " << numThreads << std::endl; | ||
|
||
#ifdef ALPAKA_CI // Reduced benchmark set for automated test runs. | ||
const unsigned numPoints = GENERATE(10u, 1'000'000u); | ||
#else | ||
const unsigned numPoints = GENERATE(10u, 100'000u, 1'000'000u, 10'000'000u, 100'000'000u, 1'000'000'000u); | ||
#endif | ||
|
||
WorkDiv workdiv{alpaka::getValidWorkDiv<Acc>( | ||
devAcc, | ||
Vec::all(numThreads * numThreads), | ||
Vec::all(numThreads), | ||
false, | ||
alpaka::GridBlockExtentSubDivRestrictions::Unrestricted)}; | ||
|
||
alpaka::test::KernelExecutionBenchmarkFixture<Acc> fixture(workdiv); | ||
|
||
RandBenchmarkKernel kernel; | ||
|
||
float result = 0.0f; | ||
|
||
REQUIRE(fixture(kernel, "Random sequence N=" + std::to_string(numPoints), result, numPoints)); | ||
// TODO: Actually check the result | ||
std::cout << "\ntemp debug normalized result = " << result / static_cast<float>(numPoints) | ||
<< " should probably converge to 0.5." << std::flush; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters