From 79667ff6a2fb1ee307236b684189bffe87a4e531 Mon Sep 17 00:00:00 2001 From: Philipp Grete Date: Wed, 21 Jun 2023 16:15:37 +0200 Subject: [PATCH 1/9] Use clang-15 --- .github/workflows/check-compilers.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/check-compilers.yml b/.github/workflows/check-compilers.yml index af7cc22f23aa..ac38d39cc85e 100644 --- a/.github/workflows/check-compilers.yml +++ b/.github/workflows/check-compilers.yml @@ -13,17 +13,17 @@ jobs: continue-on-error: true strategy: matrix: - cxx: ['g++', 'clang++-13'] + cxx: ['g++', 'clang++-15'] cmake_build_type: ['Release', 'Debug'] device: ['cuda', 'host'] parallel: ['serial', 'mpi'] - exclude: + #exclude: # Debug cuda clang build fail for the unit test. # Exclude for now until we figure out what's going on. # https://github.com/lanl/parthenon/issues/630 - - cxx: clang++-13 - device: cuda - cmake_build_type: Debug + #- cxx: clang++-15 + # device: cuda + # cmake_build_type: Debug runs-on: ubuntu-latest container: image: ghcr.io/parthenon-hpc-lab/cuda11.6-mpi-hdf5-ascent From ec3ab8b14b4e21f15cde642e25a9b5fcd61c124e Mon Sep 17 00:00:00 2001 From: Philipp Grete Date: Wed, 21 Jun 2023 16:24:29 +0200 Subject: [PATCH 2/9] Update Frontier machine file --- cmake/machinecfg/FrontierAndCrusher.cmake | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cmake/machinecfg/FrontierAndCrusher.cmake b/cmake/machinecfg/FrontierAndCrusher.cmake index 3d0769dba965..b25d163e818d 100644 --- a/cmake/machinecfg/FrontierAndCrusher.cmake +++ b/cmake/machinecfg/FrontierAndCrusher.cmake @@ -21,15 +21,15 @@ message(STATUS "Loading machine configuration for OLCF's Frontier and Crusher.\n " $ module load PrgEnv-amd craype-accel-amd-gfx90a cmake hdf5 cray-python amd/5.4.0 cray-mpich/8.1.21\n" "and environment variables:\n" " $ export MPICH_GPU_SUPPORT_ENABLED=1\n\n" - "On Frontier, different modules are required (tested on 2023-03-14): \n" - " $ module load PrgEnv-cray craype-accel-amd-gfx90a cmake hdf5 cray-python rocm/5.3.0 cray-mpich/8.1.23\n" + "On Frontier, different modules are required (tested on 2023-06-21): \n" + " $ load PrgEnv-cray craype-accel-amd-gfx90a cmake cray-hdf5-parallel cray-python amd-mixed/5.3.0 cray-mpich/8.1.23 cce/15.0.1\n" " $ export MPICH_GPU_SUPPORT_ENABLED=1\n\n" "NOTE: In order to run the test suite, the build directory should be on GPFS (or on\n" "Frontier, Lustre) work filesystem and not in your NFS user or project home (because\n" "they are read-only on compute nodes).\n\n") # common options -set(Kokkos_ARCH_ZEN2 ON CACHE BOOL "CPU architecture") +set(Kokkos_ARCH_ZEN3 ON CACHE BOOL "CPU architecture") set(PARTHENON_DISABLE_OPENMP ON CACHE BOOL "OpenMP support not yet tested in Parthenon.") set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Default release build") set(MACHINE_VARIANT "hip-mpi" CACHE STRING "Default build for CUDA and MPI") @@ -63,7 +63,7 @@ if (${MACHINE_VARIANT} MATCHES "hip") set(MACHINE_CXX_FLAGS "${MACHINE_CXX_FLAGS} -I$ENV{MPICH_DIR}/include") set(CMAKE_EXE_LINKER_FLAGS "-L$ENV{MPICH_DIR}/lib -lmpi -L$ENV{CRAY_MPICH_ROOTDIR}/gtl/lib -lmpi_gtl_hsa" CACHE STRING "Default flags for this config") elseif (${MACHINE_VARIANT} MATCHES "cray") - set(MACHINE_CXX_FLAGS "${MACHINE_CXX_FLAGS} -I$ENV{ROCM_PATH_DIR}/include") + set(MACHINE_CXX_FLAGS "${MACHINE_CXX_FLAGS} -I$ENV{ROCM_PATH}/include") set(CMAKE_EXE_LINKER_FLAGS "-L$ENV{ROCM_PATH}/lib -lamdhip64" CACHE STRING "Default flags for this config") endif() From 8a8b9bdf383cdcd37a2384fd138a3faed71e7e8a Mon Sep 17 00:00:00 2001 From: Philipp Grete Date: Wed, 21 Jun 2023 16:27:29 +0200 Subject: [PATCH 3/9] clang cuda debug builds still broken --- .github/workflows/check-compilers.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/check-compilers.yml b/.github/workflows/check-compilers.yml index ac38d39cc85e..4a445e79347a 100644 --- a/.github/workflows/check-compilers.yml +++ b/.github/workflows/check-compilers.yml @@ -17,13 +17,13 @@ jobs: cmake_build_type: ['Release', 'Debug'] device: ['cuda', 'host'] parallel: ['serial', 'mpi'] - #exclude: + exclude: # Debug cuda clang build fail for the unit test. # Exclude for now until we figure out what's going on. # https://github.com/lanl/parthenon/issues/630 - #- cxx: clang++-15 - # device: cuda - # cmake_build_type: Debug + - cxx: clang++-15 + device: cuda + cmake_build_type: Debug runs-on: ubuntu-latest container: image: ghcr.io/parthenon-hpc-lab/cuda11.6-mpi-hdf5-ascent From 01e8c7bf71a97462a97cbde8d3e6f2597e10f7ca Mon Sep 17 00:00:00 2001 From: Philipp Grete Date: Wed, 21 Jun 2023 17:22:30 +0200 Subject: [PATCH 4/9] Remove duplicated Parthenon OpenMP option --- CHANGELOG.md | 1 + CMakeLists.txt | 30 +++++------------------ benchmarks/burgers/README.md | 2 +- cmake/machinecfg/FrontierAndCrusher.cmake | 1 - cmake/machinecfg/Ookami.cmake | 1 - cmake/machinecfg/Spock.cmake | 1 - cmake/machinecfg/Summit.cmake | 1 - doc/sphinx/src/building.rst | 3 +-- 8 files changed, 9 insertions(+), 31 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 60b3588e501f..58b029b5ebef 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ - [[PR 868]](https://github.com/parthenon-hpc-lab/parthenon/pull/868) Add block-local face, edge, and nodal fields and allow for packing ### Changed (changing behavior/API/variables/...) +- [[PR 896]](https://github.com/parthenon-hpc-lab/parthenon/pull/896) Update Kokkos integration to support installed version. Use `serial` (flat MPI) host parallelization by default (instead of OpenMP) - [[PR 888]](https://github.com/parthenon-hpc-lab/parthenon/pull/888) Bump Kokkos submodule to 4.0.1 - [[PR 885]](https://github.com/parthenon-hpc-lab/parthenon/pull/885) Expose PackDescriptor and use uids in SparsePacks diff --git a/CMakeLists.txt b/CMakeLists.txt index 2769c803e89d..5d563250c459 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -34,7 +34,6 @@ include(CTest) option(PARTHENON_SINGLE_PRECISION "Run in single precision" OFF) option(PARTHENON_DISABLE_MPI "MPI is enabled by default if found, set this to True to disable MPI" OFF) option(PARTHENON_ENABLE_HOST_COMM_BUFFERS "CUDA/HIP Only: Allocate communication buffers on host (may be slower)" OFF) -option(PARTHENON_DISABLE_OPENMP "OpenMP is enabled by default if found, set this to True to disable OpenMP" OFF) option(PARTHENON_DISABLE_HDF5 "HDF5 is enabled by default if found, set this to True to disable HDF5" OFF) option(PARTHENON_DISABLE_HDF5_COMPRESSION "HDF5 compression is enabled by default, set this to True to disable compression in HDF5 output/restart files" OFF) option(PARTHENON_DISABLE_SPARSE "Sparse capability is enabled by default, set this to True to compile-time disable all sparse capability" OFF) @@ -59,13 +58,6 @@ option(TEST_INTEL_OPTIMIZATION "Test intel optimization and vectorization" OFF) option(TEST_ERROR_CHECKING "Enables the error checking unit test. This test will FAIL" OFF) option(CODE_COVERAGE "Enable code coverage reporting" OFF) -# Default to an external Kokkos package if the submodule is not populated -if(NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/external/Kokkos/CMakeLists.txt" AND NOT EXISTS "${Kokkos_ROOT}/CMakeLists.txt") - option(PARTHENON_IMPORT_KOKKOS "If ON, attempt to link to an external Kokkos library. If OFF, build Kokkos from source and package with Parthenon" ON) -else() - option(PARTHENON_IMPORT_KOKKOS "If ON, attempt to link to an external Kokkos library. If OFF, build Kokkos from source and package with Parthenon" OFF) -endif() - include(cmake/Format.cmake) include(cmake/Lint.cmake) @@ -135,22 +127,12 @@ if (NOT PARTHENON_DISABLE_MPI) set(ENABLE_MPI ON) endif() -set(ENABLE_OPENMP OFF) -if (NOT PARTHENON_DISABLE_OPENMP) - # Using Host OpenMP and Cuda with nvcc currently does not compile with C++17 standard. - # Also there is no proper support for two separate execution spaces in Parthenon right now. - # We may need to revisit this logic when OpenMP target for devices will be used in the future. - if (Kokkos_ENABLE_CUDA OR Kokkos_ENABLE_HIP) - message(STATUS "Parthenon is not using Host OpenMP because Cuda or HIP is enabled") - else() - find_package(OpenMP COMPONENTS CXX) - if (NOT OpenMP_FOUND) - message(FATAL_ERROR "OpenMP is required but couldn't be found. " - "If you want to build Parthenon without OpenMP, please rerun CMake with -DPARTHENON_DISABLE_OPENMP=ON") - endif() - set(ENABLE_OPENMP ON) - set(Kokkos_ENABLE_OPENMP ON CACHE BOOL "Allow Kokkos to use OpenMP as execution space.") - endif() + +# Default to an external Kokkos package if the submodule is not populated or an external source directory provided +if(PARTHENON_IMPORT_KOKKOS OR (NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/external/Kokkos/CMakeLists.txt" AND NOT EXISTS "${Kokkos_ROOT}/CMakeLists.txt")) + option(PARTHENON_IMPORT_KOKKOS "If ON, attempt to link to an external Kokkos library. If OFF, build Kokkos from source and package with Parthenon" ON) +else() + option(PARTHENON_IMPORT_KOKKOS "If ON, attempt to link to an external Kokkos library. If OFF, build Kokkos from source and package with Parthenon" OFF) endif() set(ENABLE_SPARSE ON) diff --git a/benchmarks/burgers/README.md b/benchmarks/burgers/README.md index 9995ca7ca837..9b4acfb85243 100644 --- a/benchmarks/burgers/README.md +++ b/benchmarks/burgers/README.md @@ -58,7 +58,7 @@ To build Parthenon on CPU, including this benchmark, with minimal external depen ```bash parthenon$ mkdir build && cd build -parthenon$ cmake -DPARTHENON_DISABLE_HDF5=ON -DPARTHENON_DISABLE_OPENMP=ON -DPARTHENON_ENABLE_PYTHON_MODULE_CHECK=OFF ../ +parthenon$ cmake -DPARTHENON_DISABLE_HDF5=ON -DPARTHENON_ENABLE_PYTHON_MODULE_CHECK=OFF ../ parthenon$ make -j ``` The executable `burgers-benchmark` should be built in `parthenon/build/benchmarks/burgers/` and can be run as, e.g. diff --git a/cmake/machinecfg/FrontierAndCrusher.cmake b/cmake/machinecfg/FrontierAndCrusher.cmake index b25d163e818d..ae318c93232c 100644 --- a/cmake/machinecfg/FrontierAndCrusher.cmake +++ b/cmake/machinecfg/FrontierAndCrusher.cmake @@ -30,7 +30,6 @@ message(STATUS "Loading machine configuration for OLCF's Frontier and Crusher.\n # common options set(Kokkos_ARCH_ZEN3 ON CACHE BOOL "CPU architecture") -set(PARTHENON_DISABLE_OPENMP ON CACHE BOOL "OpenMP support not yet tested in Parthenon.") set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Default release build") set(MACHINE_VARIANT "hip-mpi" CACHE STRING "Default build for CUDA and MPI") diff --git a/cmake/machinecfg/Ookami.cmake b/cmake/machinecfg/Ookami.cmake index b6a5e0862c07..e00b5b8ff03a 100644 --- a/cmake/machinecfg/Ookami.cmake +++ b/cmake/machinecfg/Ookami.cmake @@ -23,7 +23,6 @@ message(STATUS "Loading machine configuration for Stony Brook's A64FX Ookami.\n" set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Default release build") set(Kokkos_ARCH_A64FX ON CACHE BOOL "CPU architecture") -set(PARTHENON_DISABLE_OPENMP ON CACHE BOOL "OpenMP support not yet tested in Parthenon.") set(CMAKE_CXX_COMPILER "mpiFCC" CACHE STRING "Default compiler") set(CMAKE_CXX_FLAGS "-Nclang -ffj-fast-matmul -ffast-math -ffp-contract=fast -ffj-fp-relaxed -ffj-ilfunc -fbuiltin -fomit-frame-pointer -finline-functions -ffj-preex -ffj-zfill -ffj-swp -fopenmp-simd" CACHE STRING "Default opt flags") diff --git a/cmake/machinecfg/Spock.cmake b/cmake/machinecfg/Spock.cmake index 42018f8ba1a2..89cdbeb6a5f1 100644 --- a/cmake/machinecfg/Spock.cmake +++ b/cmake/machinecfg/Spock.cmake @@ -26,7 +26,6 @@ message(STATUS "Loading machine configuration for OLCF's Spock.\n" # common options set(Kokkos_ARCH_ZEN2 ON CACHE BOOL "CPU architecture") -set(PARTHENON_DISABLE_OPENMP ON CACHE BOOL "OpenMP support not yet tested in Parthenon.") set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Default release build") set(MACHINE_VARIANT "hip-mpi" CACHE STRING "Default build for CUDA and MPI") diff --git a/cmake/machinecfg/Summit.cmake b/cmake/machinecfg/Summit.cmake index 18b5ec87dd0e..c30fdc21ed58 100644 --- a/cmake/machinecfg/Summit.cmake +++ b/cmake/machinecfg/Summit.cmake @@ -23,7 +23,6 @@ message(STATUS "Loading machine configuration for OLCF's Summit.\n" # common options set(Kokkos_ARCH_POWER9 ON CACHE BOOL "CPU architecture") -set(PARTHENON_DISABLE_OPENMP ON CACHE BOOL "OpenMP support not yet tested in Parthenon.") set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Default release build") set(MACHINE_VARIANT "cuda-mpi" CACHE STRING "Default build for CUDA and MPI") diff --git a/doc/sphinx/src/building.rst b/doc/sphinx/src/building.rst index fdf3ab4b210b..057d5b2342b5 100644 --- a/doc/sphinx/src/building.rst +++ b/doc/sphinx/src/building.rst @@ -22,7 +22,6 @@ General list of cmake options: || PARTHENON\_DISABLE\_MPI || OFF || Option || MPI is enabled by default if found, set this to True to disable MPI | || PARTHENON\_ENABLE\_HOST\_COMM\_BUFFERS || OFF || Option || MPI communication buffers are by default allocated on the execution device. This options forces allocation in memory accessible directly by the host. | || PARTHENON\_DISABLE\_SPARSE || OFF || Option || Disable sparse allocation of sparse variables, i.e., sparse variable still work but are always allocated. See also :ref:`sparse doc `. | -|| PARTHENON\_DISABLE\_OPENMP || OFF || Option || OpenMP is enabled by default if found, set this to True to disable OpenMP | || ENABLE\_COMPILER\_WARNINGS || OFF || Option || Enable compiler warnings | || TEST\_ERROR\_CHECKING || OFF || Option || Enables the error checking unit test. This test will FAIL | || TEST\_INTEL\_OPTIMIZATION || OFF || Option || Test intel optimization and vectorization | @@ -579,7 +578,7 @@ Cuda with MPI # configure and build. Make sure to build in an directory on the GPFS filesystem if you want to run the regression tests because the home directory is not writeable from the compute nodes (which will result in the regression tests failing) $ mkdir build-cuda-mpi && cd build-cuda-mpi # note that we do not specify the mpicxx wrapper in the following as cmake automatically extracts the required include and linker options - $ cmake -DPARTHENON_DISABLE_HDF5=On -DCMAKE_BUILD_TYPE=Release -DKokkos_ENABLE_OPENMP=True -DKokkos_ARCH_POWER9=True -DKokkos_ENABLE_CUDA=True -DKokkos_ARCH_VOLTA70=True -DCMAKE_CXX_COMPILER=${PWD}/../external/Kokkos/bin/nvcc_wrapper .. + $ cmake -DPARTHENON_DISABLE_HDF5=On -DCMAKE_BUILD_TYPE=Release -DKokkos_ENABLE_OPENMP=ON -DKokkos_ARCH_POWER9=True -DKokkos_ENABLE_CUDA=True -DKokkos_ARCH_VOLTA70=True -DCMAKE_CXX_COMPILER=${PWD}/../external/Kokkos/bin/nvcc_wrapper .. $ make -j # The following commands are exepected to be run within job (interactive or scheduled) From 26eb771a700295718dd300d285e0f3edc81bcf11 Mon Sep 17 00:00:00 2001 From: Philipp Grete Date: Wed, 21 Jun 2023 18:31:14 +0200 Subject: [PATCH 5/9] Update Kokkos submodule config and import logic --- CMakeLists.txt | 121 ++++++++++++++++++++---------------- doc/sphinx/src/building.rst | 2 +- src/CMakeLists.txt | 11 ---- 3 files changed, 67 insertions(+), 67 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5d563250c459..f07689e6034f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -127,26 +127,11 @@ if (NOT PARTHENON_DISABLE_MPI) set(ENABLE_MPI ON) endif() - -# Default to an external Kokkos package if the submodule is not populated or an external source directory provided -if(PARTHENON_IMPORT_KOKKOS OR (NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/external/Kokkos/CMakeLists.txt" AND NOT EXISTS "${Kokkos_ROOT}/CMakeLists.txt")) - option(PARTHENON_IMPORT_KOKKOS "If ON, attempt to link to an external Kokkos library. If OFF, build Kokkos from source and package with Parthenon" ON) -else() - option(PARTHENON_IMPORT_KOKKOS "If ON, attempt to link to an external Kokkos library. If OFF, build Kokkos from source and package with Parthenon" OFF) -endif() - set(ENABLE_SPARSE ON) if (PARTHENON_DISABLE_SPARSE) set(ENABLE_SPARSE OFF) endif() -if (Kokkos_ENABLE_CUDA AND TEST_INTEL_OPTIMIZATION) - message(WARNING - "Intel optimizer flags may not be passed through NVCC wrapper correctly. " - "If you encounter problems, please delete your CMake cache " - "and rerun CMake with -DTEST_INTEL_OPTIMIZATION=OFF.") -endif() - set(ENABLE_HDF5 OFF) if (NOT PARTHENON_DISABLE_HDF5) set(HDF5_PREFER_PARALLEL ${ENABLE_MPI}) @@ -202,53 +187,55 @@ endif() # Kokkos recommendatation resulting in not using default GNU extensions set(CMAKE_CXX_EXTENSIONS OFF) -# Tell Kokkos to vectorize aggressively -# Kokkos prefers this capitalization for debugging reasons -SET (Kokkos_ENABLE_AGGRESSIVE_VECTORIZATION ON CACHE BOOL - "Kokkos aggressive vectorization") - # Check that gpu devices are actually detected set(NUM_GPU_DEVICES_PER_NODE "1" CACHE STRING "Number of gpu devices to use when testing if built with Kokkos_ENABLE_CUDA") set(NUM_OMP_THREADS_PER_RANK "1" CACHE STRING "Number of threads to use when testing if built with Kokkos_ENABLE_OPENMP") -if (Kokkos_ENABLE_CUDA) - # Tell Kokkos we need lambdas in Cuda. - SET (Kokkos_ENABLE_CUDA_LAMBDA ON CACHE BOOL - "Enable lambda expressions in CUDA") - if ( "${PARTHENON_ENABLE_GPU_MPI_CHECKS}" ) - configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/CTestCustom.cmake.in ${CMAKE_BINARY_DIR}/CTestCustom.cmake @ONLY) - endif() -endif() - -# If this is a debug build, set kokkos debug on -if (${CMAKE_BUILD_TYPE} STREQUAL "Debug") - message(STATUS "Enabling Kokkos debug mode") - set(Kokkos_ENABLE_DEBUG ON CACHE BOOL "Most general debug settings") - set(Kokkos_ENABLE_DEBUG_BOUNDS_CHECK ON CACHE BOOL - "Bounds checking on Kokkos views") - set(Kokkos_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK ON CACHE BOOL - "Sanity checks on Kokkos DualView") -endif() if (ENABLE_COMPILER_WARNINGS) - message(STATUS "Enabling -Wall and setting Kokkos_ENABLE_COMPILER_WARNINGS=True") - set(Kokkos_ENABLE_COMPILER_WARNINGS True CACHE BOOL - "Make the compiler warn us about things") + message(STATUS "Enabling -Wall.") add_compile_options(-Wall) endif() -set(Kokkos_ENABLE_DEPRECATED_CODE_3 OFF CACHE BOOL "No need for old/unused code.") - set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_STANDARD 17) +option(PARTHENON_IMPORT_KOKKOS "If ON, attempt to link to an external Kokkos library. If OFF, build Kokkos from source and package with Parthenon" OFF) if (NOT TARGET Kokkos::kokkos) if (PARTHENON_IMPORT_KOKKOS) find_package(Kokkos 4) if (NOT Kokkos_FOUND) - unset(PARTHENON_IMPORT_KOKKOS CACHE) message(FATAL_ERROR "Could not find external Kokkos. Consider importing a Kokkos installation into your environment or disabling external Kokkos with e.g. -DPARTHENON_IMPORT_KOKKOS=OFF") + else() + message(STATUS "Using imported Kokkos from ${Kokkos_CONFIG}. " + "All manually defined Kokkos variables will be ignored as they have no affect on the imported " + "(installed) version. If you want to use a custom Kokkos version, please use the one shipped " + "as Parthenon submodule.") endif() else() + # First, set some Kokkos options + # Tell Kokkos to vectorize aggressively + # Kokkos prefers this capitalization for debugging reasons + SET (Kokkos_ENABLE_AGGRESSIVE_VECTORIZATION ON CACHE BOOL + "Kokkos aggressive vectorization") + + # If this is a debug build, set kokkos debug on + if (${CMAKE_BUILD_TYPE} STREQUAL "Debug") + message(STATUS "Enabling Kokkos debug mode") + set(Kokkos_ENABLE_DEBUG ON CACHE BOOL "Most general debug settings") + set(Kokkos_ENABLE_DEBUG_BOUNDS_CHECK ON CACHE BOOL + "Bounds checking on Kokkos views") + set(Kokkos_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK ON CACHE BOOL + "Sanity checks on Kokkos DualView") + endif() + + # Also enable Compiler warnings for Kokkos + if (ENABLE_COMPILER_WARNINGS) + message(STATUS "Setting Kokkos_ENABLE_COMPILER_WARNINGS=True") + set(Kokkos_ENABLE_COMPILER_WARNINGS True CACHE BOOL + "Make the compiler warn us about things") + endif() + + # Second, add Kokkos if (EXISTS ${Kokkos_ROOT}/CMakeLists.txt) add_subdirectory(${Kokkos_ROOT} Kokkos) message(STATUS "Using Kokkos source from Kokkos_ROOT=${Kokkos_ROOT}") @@ -260,7 +247,16 @@ if (NOT TARGET Kokkos::kokkos) endif() endif() endif() +get_cmake_property(_variableNames VARIABLES) +list (SORT _variableNames) +foreach (_variableName ${_variableNames}) + message(STATUS "${_variableName}=${${_variableName}}") +endforeach() + +message(STATUS "CUDA: ${Kokkos_ENABLE_CUDA}") +# After we have imported Kokkos we can now report/check our config as Kokkos_ENABLE_XXX +# is also availalbe when imported. if (Kokkos_ENABLE_SYCL) message(WARNING "SYCL backend is currently NOT tested in Parthenon due to lack of access to hardware. " @@ -275,6 +271,32 @@ if (PARTHENON_ENABLE_HOST_COMM_BUFFERS) endif() endif() +if (Kokkos_ENABLE_CUDA AND TEST_INTEL_OPTIMIZATION) + message(WARNING + "Intel optimizer flags may not be passed through NVCC wrapper correctly. " + "If you encounter problems, please delete your CMake cache " + "and rerun CMake with -DTEST_INTEL_OPTIMIZATION=OFF.") +endif() + +# Globally turn on useful intel and/or nvcc compiler output +if (Kokkos_ENABLE_CUDA) + if(CHECK_REGISTRY_PRESSURE) + add_compile_options(-Xptxas=-v) + endif() +endif() +# Note that these options may not play nice with nvcc wrapper +if (TEST_INTEL_OPTIMIZATION) + add_compile_options(-fp-model fast=2 -qopt_report5 -vec-threshold0 -qopt_report_phase=vec) +endif() + +# GPU check on the build node are currently only supported for Nvidia GPUs +if (Kokkos_ENABLE_CUDA AND "${PARTHENON_ENABLE_GPU_MPI_CHECKS}" ) + configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/CTestCustom.cmake.in ${CMAKE_BINARY_DIR}/CTestCustom.cmake @ONLY) +endif() + + + + # Build Tests and download Catch2 if (PARTHENON_ENABLE_UNIT_TESTS OR PARTHENON_ENABLE_INTEGRATION_TESTS OR PARTHENON_ENABLE_REGRESSION_TESTS OR PARTHENON_ENABLE_PERFORMANCE_TESTS) @@ -334,17 +356,6 @@ if (PARTHENON_ENABLE_UNIT_TESTS OR PARTHENON_ENABLE_INTEGRATION_TESTS OR PARTHEN add_subdirectory(tst) endif() -# Globally turn on useful intel and/or nvcc compiler output -if (Kokkos_ENABLE_CUDA) - if(CHECK_REGISTRY_PRESSURE) - add_compile_options(-Xptxas=-v) - endif() -endif() -# Note that these options may not play nice with nvcc wrapper -if (TEST_INTEL_OPTIMIZATION) - add_compile_options(-fp-model fast=2 -qopt_report5 -vec-threshold0 -qopt_report_phase=vec) -endif() - if (PARTHENON_ENABLE_ASCENT) find_package(Ascent REQUIRED NO_DEFAULT_PATH) endif() diff --git a/doc/sphinx/src/building.rst b/doc/sphinx/src/building.rst index 057d5b2342b5..0965f2756cd2 100644 --- a/doc/sphinx/src/building.rst +++ b/doc/sphinx/src/building.rst @@ -172,7 +172,7 @@ The below example ``CMakeLists.txt`` can be used to compile the cmake_minimum_required(VERSION 3.11) project(parthenon_linking_example) - set(Kokkos_CXX_STANDARD "c++14") + set(CMAKE_CXX_STANDARD "17") set(CMAKE_CXX_EXTENSIONS OFF) find_package(parthenon REQUIRED PATHS "/path/to/parthenon/install") add_executable( diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 90e4240538ea..4721b45fc590 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -287,17 +287,6 @@ if (ENABLE_HDF5) target_link_libraries(parthenon PUBLIC HDF5_C) endif() -# For Cuda with NVCC (<11.2) and C++17 Kokkos currently does not work/compile with -# relaxed-constexpr, see https://github.com/kokkos/kokkos/issues/3496 -# However, Parthenon heavily relies on it and there is no harm in compiling Kokkos -# without and Parthenon with (via Max Katz on the Kokkos Slack channel). -# Therefore, we don't use the Kokkos_ENABLE_CUDA_CONSTEXPR option add the flag manually. -# Also, not checking for NVIDIA as nvcc_wrapper is identified as GNU so we just make sure -# the flag is not added when compiling with Clang for Cuda. -if (Kokkos_ENABLE_CUDA AND NOT CMAKE_CXX_COMPILER_ID STREQUAL "Clang") - target_compile_options(parthenon PUBLIC --expt-relaxed-constexpr) -endif() - target_link_libraries(parthenon PUBLIC Kokkos::kokkos) if (PARTHENON_ENABLE_ASCENT) From ed5390cc1f375e15978a6d8bbaa451771ac753fd Mon Sep 17 00:00:00 2001 From: Philipp Grete Date: Wed, 21 Jun 2023 19:24:50 +0200 Subject: [PATCH 6/9] Silly me... we need relaxed constexpr --- CMakeLists.txt | 5 ----- src/CMakeLists.txt | 11 +++++++++++ 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f07689e6034f..b22a7d151585 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -247,11 +247,6 @@ if (NOT TARGET Kokkos::kokkos) endif() endif() endif() -get_cmake_property(_variableNames VARIABLES) -list (SORT _variableNames) -foreach (_variableName ${_variableNames}) - message(STATUS "${_variableName}=${${_variableName}}") -endforeach() message(STATUS "CUDA: ${Kokkos_ENABLE_CUDA}") diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 4721b45fc590..90e4240538ea 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -287,6 +287,17 @@ if (ENABLE_HDF5) target_link_libraries(parthenon PUBLIC HDF5_C) endif() +# For Cuda with NVCC (<11.2) and C++17 Kokkos currently does not work/compile with +# relaxed-constexpr, see https://github.com/kokkos/kokkos/issues/3496 +# However, Parthenon heavily relies on it and there is no harm in compiling Kokkos +# without and Parthenon with (via Max Katz on the Kokkos Slack channel). +# Therefore, we don't use the Kokkos_ENABLE_CUDA_CONSTEXPR option add the flag manually. +# Also, not checking for NVIDIA as nvcc_wrapper is identified as GNU so we just make sure +# the flag is not added when compiling with Clang for Cuda. +if (Kokkos_ENABLE_CUDA AND NOT CMAKE_CXX_COMPILER_ID STREQUAL "Clang") + target_compile_options(parthenon PUBLIC --expt-relaxed-constexpr) +endif() + target_link_libraries(parthenon PUBLIC Kokkos::kokkos) if (PARTHENON_ENABLE_ASCENT) From b0a41360823d767403ad8e035280f9bf69b9f368 Mon Sep 17 00:00:00 2001 From: Philipp Grete Date: Wed, 21 Jun 2023 20:30:59 +0200 Subject: [PATCH 7/9] Fix some unused vars --- example/particle_tracers/particle_tracers.cpp | 1 - src/bvals/comms/boundary_communication.cpp | 4 ++-- tst/unit/test_unit_integrators.cpp | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/example/particle_tracers/particle_tracers.cpp b/example/particle_tracers/particle_tracers.cpp index 5887584cdc75..005bb6651848 100644 --- a/example/particle_tracers/particle_tracers.cpp +++ b/example/particle_tracers/particle_tracers.cpp @@ -374,7 +374,6 @@ void ProblemGenerator(MeshBlock *pmb, ParameterInput *pin) { int num_tracers_meshblock = std::round(num_tracers * number_meshblock / number_mesh); int gid = pmb->gid; - int nbtotal = pmb->pmy_mesh->nbtotal; ParArrayND new_indices; swarm->AddEmptyParticles(num_tracers_meshblock, new_indices); diff --git a/src/bvals/comms/boundary_communication.cpp b/src/bvals/comms/boundary_communication.cpp index 1ad91493d91d..698db6f0c0f3 100644 --- a/src/bvals/comms/boundary_communication.cpp +++ b/src/bvals/comms/boundary_communication.cpp @@ -314,8 +314,8 @@ TaskStatus ApplyCoarseBoundaryConditions(std::shared_ptr> &md) { TaskID AddBoundaryExchangeTasks(TaskID dependency, TaskList &tl, std::shared_ptr> &md, bool multilevel) { const auto any = BoundaryType::any; - const auto local = BoundaryType::local; - const auto nonlocal = BoundaryType::nonlocal; + // const auto local = BoundaryType::local; + // const auto nonlocal = BoundaryType::nonlocal; // auto send = tl.AddTask(dependency, SendBoundBufs, md); // auto send_local = tl.AddTask(dependency, SendBoundBufs, md); diff --git a/tst/unit/test_unit_integrators.cpp b/tst/unit/test_unit_integrators.cpp index e6ab151e05c2..834526d45503 100644 --- a/tst/unit/test_unit_integrators.cpp +++ b/tst/unit/test_unit_integrators.cpp @@ -142,7 +142,7 @@ void Integrate(const Integrator &integrator, const Stepper &step, const Real tf, TEST_CASE("Low storage integrator", "[StagedIntegrator]") { GIVEN("A state with an initial condition") { - Real t0 = 0, tf = 1.15; // delibarately not a nice fraction of a period + Real tf = 1.15; // delibarately not a nice fraction of a period State_t ufinal; GetTrueSolution(tf, ufinal); WHEN("We integrate with LowStorage rk1") { From a9ea17a789e235a366e91c8664f7b292b43284f3 Mon Sep 17 00:00:00 2001 From: Jonah Maxwell Miller Date: Tue, 20 Jun 2023 16:52:34 -0600 Subject: [PATCH 8/9] map instead of unordered_map --- src/interface/sparse_pool.hpp | 8 +++++--- tst/unit/test_state_descriptor.cpp | 8 ++++++++ 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/src/interface/sparse_pool.hpp b/src/interface/sparse_pool.hpp index f457626b11cb..56a7d5d7d1eb 100644 --- a/src/interface/sparse_pool.hpp +++ b/src/interface/sparse_pool.hpp @@ -13,9 +13,9 @@ #ifndef INTERFACE_SPARSE_POOL_HPP_ #define INTERFACE_SPARSE_POOL_HPP_ +#include #include #include -#include #include #include "metadata.hpp" @@ -74,7 +74,7 @@ class SparsePool { const std::string &base_name() const { return base_name_; } const std::string &controller_base_name() const { return controller_base_name_; } const Metadata &shared_metadata() const { return shared_metadata_; } - const std::unordered_map &pool() const { return pool_; } + const std::map &pool() const { return pool_; } auto size() const { return pool_.size(); } // Add a new sparse ID to the pool with optional arguments: @@ -117,7 +117,9 @@ class SparsePool { Metadata shared_metadata_; // Metadata per sparse id - std::unordered_map pool_; + // JMM: note that this map SHOULD be ordered as sparse ids, being + // integers, have an implicit ordering. + std::map pool_; }; } // namespace parthenon diff --git a/tst/unit/test_state_descriptor.cpp b/tst/unit/test_state_descriptor.cpp index 3b058372174c..ef69f182e514 100644 --- a/tst/unit/test_state_descriptor.cpp +++ b/tst/unit/test_state_descriptor.cpp @@ -219,6 +219,14 @@ TEST_CASE("Test dependency resolution in StateDescriptor", "[StateDescriptor]") REQUIRE(pkg4->FieldMetadata("sparse", sparse_ids[i]) == (m_sparse_provides)); } } + AND_THEN("The sparse ids in the sparse pool are sorted") { + auto &pool = (pkg4->GetSparsePool("sparse")).pool(); + std::vector local_ids; + for (auto &[id, m] : pool) { + local_ids.push_back(id); + } + REQUIRE(std::is_sorted(local_ids.begin(), local_ids.end())); + } } } From a7e4b21174232ba209b3a772fbb13208e122fb06 Mon Sep 17 00:00:00 2001 From: Jonah Maxwell Miller Date: Tue, 20 Jun 2023 16:57:42 -0600 Subject: [PATCH 9/9] PR checklist --- CHANGELOG.md | 1 + src/interface/sparse_pool.hpp | 2 +- tst/unit/test_state_descriptor.cpp | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6f8c0e300b67..55d6660fa8bf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ ### Changed (changing behavior/API/variables/...) - [[PR 896]](https://github.com/parthenon-hpc-lab/parthenon/pull/896) Update Kokkos integration to support installed version. Use `serial` (flat MPI) host parallelization by default (instead of OpenMP) - [[PR 888]](https://github.com/parthenon-hpc-lab/parthenon/pull/888) Bump Kokkos submodule to 4.0.1 +- [[PR 894]](https://github.com/parthenon-hpc-lab/parthenon/pull/894) Demand that sparse pool order sparse ids - [[PR 885]](https://github.com/parthenon-hpc-lab/parthenon/pull/885) Expose PackDescriptor and use uids in SparsePacks ### Fixed (not changing behavior/API/variables/...) diff --git a/src/interface/sparse_pool.hpp b/src/interface/sparse_pool.hpp index 56a7d5d7d1eb..ec21b1e390a0 100644 --- a/src/interface/sparse_pool.hpp +++ b/src/interface/sparse_pool.hpp @@ -1,5 +1,5 @@ //======================================================================================== -// (C) (or copyright) 2020-2022. Triad National Security, LLC. All rights reserved. +// (C) (or copyright) 2020-2023. Triad National Security, LLC. All rights reserved. // // This program was produced under U.S. Government contract 89233218CNA000001 for Los // Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC diff --git a/tst/unit/test_state_descriptor.cpp b/tst/unit/test_state_descriptor.cpp index ef69f182e514..8f290836c0fa 100644 --- a/tst/unit/test_state_descriptor.cpp +++ b/tst/unit/test_state_descriptor.cpp @@ -3,7 +3,7 @@ // Copyright(C) 2014 James M. Stone and other code contributors // Licensed under the 3-clause BSD License, see LICENSE file for details //======================================================================================== -// (C) (or copyright) 2020-2021. Triad National Security, LLC. All rights reserved. +// (C) (or copyright) 2020-2023. Triad National Security, LLC. All rights reserved. // // This program was produced under U.S. Government contract 89233218CNA000001 for Los // Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC