Skip to content

Commit

Permalink
Bump rocm image and change arch for debug hip build
Browse files Browse the repository at this point in the history
  • Loading branch information
pgrete committed Nov 22, 2024
1 parent e8b7c65 commit 9c47692
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 10 deletions.
8 changes: 6 additions & 2 deletions .github/workflows/check-compilers.yml
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ jobs:
parallel: ['serial', 'mpi']
runs-on: ubuntu-latest
container:
image: ghcr.io/parthenon-hpc-lab/rocm5.4.3-mpi-hdf5
image: ghcr.io/parthenon-hpc-lab/rocm6.2-mpi-hdf5
env:
CMAKE_GENERATOR: Ninja
steps:
Expand All @@ -74,7 +74,11 @@ jobs:
-DCMAKE_CXX_COMPILER=${{ matrix.cxx }} \
-DCMAKE_BUILD_TYPE=${{ matrix.cmake_build_type }} \
-DMACHINE_CFG=${PWD}/cmake/machinecfg/GitHubActions.cmake \
-DMACHINE_VARIANT=${{ matrix.device }}_${{ matrix.parallel }}
-DMACHINE_VARIANT=${{ matrix.device }}_${{ matrix.parallel }} \
# Manually chaning the arch for this (debug) build as the
# -O0 option causes compiler issue for the navi 1030 GPU at
# compile time, see https://github.com/parthenon-hpc-lab/parthenon/pull/1191#issuecomment-2492035364
-DKokkos_ARCH_AMD_GFX90A=ON -DKokkos_ARCH_NAVI1030=OFF
- name: Build
run: |
cmake --build builddir --parallel 2
14 changes: 6 additions & 8 deletions scripts/docker/Dockerfile.hip-rocm
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM rocm/dev-ubuntu-20.04:5.4.3
FROM rocm/dev-ubuntu-24.04:6.2

RUN apt-get clean && apt-get update -y && \
DEBIAN_FRONTEND="noninteractive" TZ=America/New_York apt-get install -y --no-install-recommends git python3-minimal libpython3-stdlib bc hwloc wget openssh-client python3-numpy python3-h5py python3-matplotlib lcov curl cmake ninja-build openmpi-bin libopenmpi-dev && \
Expand All @@ -14,12 +14,10 @@ RUN cd /tmp && \
cd / && \
rm -rf /tmp/hdf5-1.10.8*

# "mpic++ --showme" forgets open-pal in Ubuntu 20.04 + OpenMPI 4.0.3
# https://bugs.launchpad.net/ubuntu/+source/openmpi/+bug/1941786
# https://github.com/open-mpi/ompi/issues/9317
ENV LDFLAGS="-lopen-pal"

RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 10

# uid 1000 maps to the one running the container on the CI host
RUN useradd --create-home --shell /bin/bash -u 1000 -G render ci
# Latest image has default user with uid 1000 (which maps to the one running the container on the CI host
# Need to add user to the group that can access the GPU
RUN usermod -a -G render ubuntu

WORKDIR /home/ubuntu

0 comments on commit 9c47692

Please sign in to comment.