From bc98df05e2c1a31380a22d4d5cd39976cc75471e Mon Sep 17 00:00:00 2001 From: Philipp Grete Date: Wed, 10 Jul 2024 11:56:43 +0200 Subject: [PATCH 1/4] Test cuda integration on hamilton --- .github/workflows/ci-short.yml | 39 ++++++++++++++++++++++++++++ cmake/machinecfg/GitHubActions.cmake | 6 ++--- 2 files changed, 42 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci-short.yml b/.github/workflows/ci-short.yml index ecb4052411ee..936999c0e7cb 100644 --- a/.github/workflows/ci-short.yml +++ b/.github/workflows/ci-short.yml @@ -173,3 +173,42 @@ jobs: build/CMakeFiles/CMakeOutput.log retention-days: 3 + integration-nvidia-hamilton: + runs-on: [self-hosted, navi1030] + container: + image: ghcr.io/parthenon-hpc-lab/cuda11.6-mpi-hdf5-ascent + # Map to local user id on CI machine to allow writing to build cache + options: --user 1000 --runtime=nvidia + env: + CMAKE_GENERATOR: Ninja + CMAKE_BUILD_PARALLEL_LEVEL: 8 # num threads for build + steps: + - uses: actions/checkout@v3 + with: + submodules: 'true' + - name: Configure + run: | + cmake -B build \ + -DMACHINE_CFG=${PWD}/cmake/machinecfg/GitHubActions.cmake \ + -DCMAKE_BUILD_TYPE=Release \ + -DMACHINE_VARIANT=cuda-mpi \ + -DCMAKE_CXX_COMPILER=${PWD}/external/Kokkos/bin/nvcc_wrapper + # Test example with "variables" and output + - name: advection + run: | + cmake --build build -t advection-example + cd build + ctest -R regression_mpi_test:output_hdf5 + # Test example with swarms + - name: particle-leapfrog + run: | + cmake --build build -t particle-leapfrog + cd build + ctest -R regression_mpi_test:particle_leapfrog + + - uses: actions/upload-artifact@v3 + with: + name: configure-log-integration-amdgpu + path: | + build/CMakeFiles/CMakeOutput.log + retention-days: 3 diff --git a/cmake/machinecfg/GitHubActions.cmake b/cmake/machinecfg/GitHubActions.cmake index 663dcb38d682..adb3b018aa6a 100644 --- a/cmake/machinecfg/GitHubActions.cmake +++ b/cmake/machinecfg/GitHubActions.cmake @@ -19,11 +19,12 @@ message(STATUS "Loading machine configuration for GitHub Actions CI. ") # common options set(NUM_MPI_PROC_TESTING "2" CACHE STRING "CI runs tests with 2 MPI ranks") +set(Kokkos_ENABLE_ZEN3 ON CACHE BOOL "Enable Zen3") set(MACHINE_CXX_FLAGS "") if (${MACHINE_VARIANT} MATCHES "cuda") - # using an arbitrary arch as GitHub Action runners don't have GPUs - set(Kokkos_ARCH_VOLTA70 ON CACHE BOOL "GPU architecture") + # using an arch that matches Hamilton at Hamburg Obs + set(Kokkos_ARCH_AMPERE86 ON CACHE BOOL "GPU architecture") set(Kokkos_ENABLE_CUDA ON CACHE BOOL "Enable Cuda") if (${CMAKE_CXX_COMPILER} MATCHES "clang") set(MACHINE_CXX_FLAGS "${MACHINE_CXX_FLAGS} -Wno-unknown-cuda-version") @@ -32,7 +33,6 @@ elseif (${MACHINE_VARIANT} MATCHES "hip") # using an arch that matches Hamilton at Hamburg Obs set(Kokkos_ARCH_NAVI1030 ON CACHE BOOL "GPU architecture") set(Kokkos_ENABLE_HIP ON CACHE BOOL "Enable HIP") - set(Kokkos_ENABLE_ZEN3 ON CACHE BOOL "Enable Zen3") else() set(MACHINE_CXX_FLAGS "${MACHINE_CXX_FLAGS} -fopenmp-simd") endif() From eb588f80d25418d830257df768c488b05e56f77a Mon Sep 17 00:00:00 2001 From: Philipp Grete Date: Wed, 10 Jul 2024 12:11:32 +0200 Subject: [PATCH 2/4] Fix indent and bump actions version --- .github/workflows/ci-short.yml | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/.github/workflows/ci-short.yml b/.github/workflows/ci-short.yml index 936999c0e7cb..6e4f778a6d7f 100644 --- a/.github/workflows/ci-short.yml +++ b/.github/workflows/ci-short.yml @@ -24,7 +24,7 @@ jobs: # map to local user id on CI machine to allow writing to build cache options: --user 1001 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: submodules: 'true' - name: cpplint @@ -33,7 +33,7 @@ jobs: run: | cmake -DCMAKE_CXX_FLAGS=-Werror -Bbuild-copyright-check cmake --build build-copyright-check -t check-copyright - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: name: configure-log-style path: build-copyright-check/CMakeFiles/CMakeOutput.log @@ -49,7 +49,7 @@ jobs: # map to local user id on CI machine to allow writing to build cache options: --user 1001 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: submodules: 'true' - name: Configure @@ -65,7 +65,7 @@ jobs: # Pick GPU with most available memory export CUDA_VISIBLE_DEVICES=$(nvidia-smi --query-gpu=memory.free,index --format=csv,nounits,noheader | sort -nr | head -1 | awk '{ print $NF }') ctest -LE 'performance|regression' - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: name: configure-log-unit-${{ matrix.device }} path: build/CMakeFiles/CMakeOutput.log @@ -81,7 +81,7 @@ jobs: # map to local user id on CI machine to allow writing to build cache options: --user 1001 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: submodules: 'true' - name: Configure @@ -124,7 +124,7 @@ jobs: if grep HtoD profile.txt; then exit 1; fi if grep DtoH profile.txt; then exit 1; fi - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: name: configure-log-integration-${{ matrix.device }} path: | @@ -143,7 +143,7 @@ jobs: CMAKE_GENERATOR: Ninja CMAKE_BUILD_PARALLEL_LEVEL: 8 # num threads for build steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: submodules: 'true' - name: Configure @@ -166,14 +166,14 @@ jobs: cd build ctest -R regression_mpi_test:particle_leapfrog - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: name: configure-log-integration-amdgpu path: | build/CMakeFiles/CMakeOutput.log retention-days: 3 - integration-nvidia-hamilton: + integration-nvidia-hamilton: runs-on: [self-hosted, navi1030] container: image: ghcr.io/parthenon-hpc-lab/cuda11.6-mpi-hdf5-ascent @@ -183,7 +183,7 @@ jobs: CMAKE_GENERATOR: Ninja CMAKE_BUILD_PARALLEL_LEVEL: 8 # num threads for build steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: submodules: 'true' - name: Configure @@ -206,7 +206,7 @@ jobs: cd build ctest -R regression_mpi_test:particle_leapfrog - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: name: configure-log-integration-amdgpu path: | From e89c9960768a0b95cd3888ede4e27a286d0f9f52 Mon Sep 17 00:00:00 2001 From: Philipp Grete Date: Wed, 10 Jul 2024 12:29:20 +0200 Subject: [PATCH 3/4] Debug cuda hamilton --- .github/workflows/ci-short.yml | 1 + cmake/machinecfg/GitHubActions.cmake | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci-short.yml b/.github/workflows/ci-short.yml index 6e4f778a6d7f..e21612cc221c 100644 --- a/.github/workflows/ci-short.yml +++ b/.github/workflows/ci-short.yml @@ -196,6 +196,7 @@ jobs: # Test example with "variables" and output - name: advection run: | + nvidia-smi cmake --build build -t advection-example cd build ctest -R regression_mpi_test:output_hdf5 diff --git a/cmake/machinecfg/GitHubActions.cmake b/cmake/machinecfg/GitHubActions.cmake index adb3b018aa6a..4457b93132f6 100644 --- a/cmake/machinecfg/GitHubActions.cmake +++ b/cmake/machinecfg/GitHubActions.cmake @@ -19,7 +19,7 @@ message(STATUS "Loading machine configuration for GitHub Actions CI. ") # common options set(NUM_MPI_PROC_TESTING "2" CACHE STRING "CI runs tests with 2 MPI ranks") -set(Kokkos_ENABLE_ZEN3 ON CACHE BOOL "Enable Zen3") +set(Kokkos_ARCH_ZEN3 ON CACHE BOOL "Enable Zen3") set(MACHINE_CXX_FLAGS "") if (${MACHINE_VARIANT} MATCHES "cuda") From 2811e271eead6ddcb4fbc653078eeac4e7e798e7 Mon Sep 17 00:00:00 2001 From: Philipp Grete Date: Wed, 10 Jul 2024 16:46:28 +0200 Subject: [PATCH 4/4] Try different cmdline param --- .github/workflows/ci-short.yml | 2 +- cmake/machinecfg/GitHubActions.cmake | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci-short.yml b/.github/workflows/ci-short.yml index e21612cc221c..90c357986780 100644 --- a/.github/workflows/ci-short.yml +++ b/.github/workflows/ci-short.yml @@ -178,7 +178,7 @@ jobs: container: image: ghcr.io/parthenon-hpc-lab/cuda11.6-mpi-hdf5-ascent # Map to local user id on CI machine to allow writing to build cache - options: --user 1000 --runtime=nvidia + options: --user 1000 --gpus all env: CMAKE_GENERATOR: Ninja CMAKE_BUILD_PARALLEL_LEVEL: 8 # num threads for build diff --git a/cmake/machinecfg/GitHubActions.cmake b/cmake/machinecfg/GitHubActions.cmake index 4457b93132f6..b539ec4fe44d 100644 --- a/cmake/machinecfg/GitHubActions.cmake +++ b/cmake/machinecfg/GitHubActions.cmake @@ -19,7 +19,7 @@ message(STATUS "Loading machine configuration for GitHub Actions CI. ") # common options set(NUM_MPI_PROC_TESTING "2" CACHE STRING "CI runs tests with 2 MPI ranks") -set(Kokkos_ARCH_ZEN3 ON CACHE BOOL "Enable Zen3") +set(Kokkos_ARCH_ZEN2 ON CACHE BOOL "Enable Zen2") set(MACHINE_CXX_FLAGS "") if (${MACHINE_VARIANT} MATCHES "cuda")